Skip to content

Forecaster Module

Core forecasting classes and utilities.

spotforecast2_safe.forecaster

ForecasterBase

Bases: ABC

Base class for all forecasters in spotforecast2.

All forecasters should specify all the parameters that can be set at the class level in their init.

Attributes:

Name Type Description
__spotforecast_tags__

Dictionary with forecaster tags that characterize the behavior of the forecaster.

Examples:

To see all abstract methods that need to be implemented:

>>> import inspect
>>> from spotforecast2_safe.forecaster.base import ForecasterBase
>>> [m[0] for m in inspect.getmembers(ForecasterBase, predicate=inspect.isabstract)]
['create_train_X_y', 'fit', 'predict', 'set_params']
Source code in src/spotforecast2_safe/forecaster/base.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
class ForecasterBase(ABC):
    """Base class for all forecasters in spotforecast2.

    All forecasters should specify all the parameters that can be set at
    the class level in their __init__.

    Attributes:
        __spotforecast_tags__: Dictionary with forecaster tags that characterize
            the behavior of the forecaster.

    Examples:
        To see all abstract methods that need to be implemented:

        >>> import inspect
        >>> from spotforecast2_safe.forecaster.base import ForecasterBase
        >>> [m[0] for m in inspect.getmembers(ForecasterBase, predicate=inspect.isabstract)]
        ['create_train_X_y', 'fit', 'predict', 'set_params']
    """

    def _preprocess_repr(
        self,
        estimator: object | None = None,
        training_range_: dict[str, str] | None = None,
        series_names_in_: list[str] | None = None,
        exog_names_in_: list[str] | None = None,
        transformer_series: object | dict[str, object] | None = None,
    ) -> tuple[str, str | None, str | None, str | None, str | None]:
        """Prepare the information to be displayed when a Forecaster object is printed.

        Args:
            estimator: Estimator object. Default is None.
            training_range_: Training range. Only used for ForecasterRecursiveMultiSeries.
                Default is None.
            series_names_in_: Names of the series used in the forecaster.
                Only used for ForecasterRecursiveMultiSeries. Default is None.
            exog_names_in_: Names of the exogenous variables used in the forecaster.
                Default is None.
            transformer_series: Transformer used in the series.
                Only used for ForecasterRecursiveMultiSeries. Default is None.

        Returns:
            Tuple containing params (estimator parameters string), training_range_
            (training range string representation), series_names_in_ (series names
            string representation), exog_names_in_ (exogenous variable names string
            representation), and transformer_series (transformer string representation).

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> estimator = Ridge(alpha=0.5)
            >>> forecaster = ForecasterRecursive(estimator=estimator, lags=3)
            >>> params, tr, sn, en, ts = forecaster._preprocess_repr(estimator=estimator)
            >>> params
            "{'alpha': 0.5, 'copy_X': True, 'fit_intercept': True, 'max_iter': None, 'positive': False, 'random_state': None, 'solver': 'auto', 'tol': 0.0001}"
        """

        if estimator is not None:
            if isinstance(estimator, Pipeline):
                name_pipe_steps = tuple(
                    name + "__" for name in estimator.named_steps.keys()
                )
                params = {
                    key: value
                    for key, value in estimator.get_params().items()
                    if key.startswith(name_pipe_steps)
                }
            else:
                params = estimator.get_params()
            params = str(params)
        else:
            params = None

        if training_range_ is not None:
            training_range_ = [
                f"'{k}': {v.astype(str).to_list()}" for k, v in training_range_.items()
            ]
            if len(training_range_) > 10:
                training_range_ = training_range_[:5] + ["..."] + training_range_[-5:]
            training_range_ = ", ".join(training_range_)

        if series_names_in_ is not None:
            if len(series_names_in_) > 50:
                series_names_in_ = (
                    series_names_in_[:25] + ["..."] + series_names_in_[-25:]
                )
            series_names_in_ = ", ".join(series_names_in_)

        if exog_names_in_ is not None:
            if len(exog_names_in_) > 50:
                exog_names_in_ = exog_names_in_[:25] + ["..."] + exog_names_in_[-25:]
            exog_names_in_ = ", ".join(exog_names_in_)

        if transformer_series is not None:
            if isinstance(transformer_series, dict):
                transformer_series = [
                    f"'{k}': {v}" for k, v in transformer_series.items()
                ]
                if len(transformer_series) > 10:
                    transformer_series = (
                        transformer_series[:5] + ["..."] + transformer_series[-5:]
                    )
                transformer_series = ", ".join(transformer_series)
            else:
                transformer_series = str(transformer_series)

        return (
            params,
            training_range_,
            series_names_in_,
            exog_names_in_,
            transformer_series,
        )

    def _format_text_repr(
        self,
        text: str,
        max_text_length: int = 58,
        width: int = 80,
        indent: str = "    ",
    ) -> str:
        """Format text for __repr__ method.

        Args:
            text: Text to format.
            max_text_length: Maximum length of the text before wrapping. Default is 58.
            width: Maximum width of the text. Default is 80.
            indent: Indentation of the text. Default is four spaces.

        Returns:
            Formatted text string with proper wrapping and indentation.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster._format_text_repr("Short text")
            'Short text'
        """

        if text is not None and len(text) > max_text_length:
            text = "\n    " + textwrap.fill(
                str(text), width=width, subsequent_indent=indent
            )

        return text

    @abstractmethod
    def create_train_X_y(
        self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None
    ) -> tuple[pd.DataFrame, pd.Series]:
        """Create training matrices from univariate time series and exogenous variables.

        Args:
            y: Training time series.
            exog: Exogenous variable(s) included as predictor(s). Must have the same
                number of observations as y and their indexes must be aligned.
                Default is None.

        Returns:
            Tuple containing X_train (training values/predictors with shape
            (len(y) - max_lag, len(lags))) and y_train (target values of the
            time series related to each row of X_train with shape (len(y) - max_lag,)).

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> y = pd.Series(np.arange(10), name='y')
            >>> X_train, y_train = forecaster.create_train_X_y(y)
            >>> X_train.head(2)
               lag_1  lag_2  lag_3
            3    2.0    1.0    0.0
            4    3.0    2.0    1.0
            >>> y_train.head(2)
            3    3
            4    4
            Name: y, dtype: int64
        """

        pass

    @abstractmethod
    def fit(self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None) -> None:
        """Training Forecaster.

        Args:
            y: Training time series.
            exog: Exogenous variable(s) included as predictor(s). Must have the same
                number of observations as y and their indexes must be aligned so
                that y[i] is regressed on exog[i]. Default is None.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> y = pd.Series(np.arange(10), name='y')
            >>> forecaster.fit(y)
            >>> forecaster.is_fitted
            True
        """

        pass

    @abstractmethod
    def predict(
        self,
        steps: int,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
    ) -> pd.Series:
        """Predict n steps ahead.

        Args:
            steps: Number of steps to predict.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1). If None, the values stored in
                last_window are used to calculate the initial predictors, and the
                predictions start right after training data. Default is None.
            exog: Exogenous variable(s) included as predictor(s). Default is None.

        Returns:
            Predicted values as a pandas Series.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> y = pd.Series(np.arange(10), name='y')
            >>> forecaster.fit(y)
            >>> forecaster.predict(steps=3)
            10    9.5
            11    9.0
            12    8.5
            Name: pred, dtype: float64
        """

        pass

    @abstractmethod
    def set_params(self, params: dict[str, object]) -> None:
        """Set new values to the parameters of the scikit-learn model stored in the forecaster.

        Args:
            params: Parameters values dictionary.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(alpha=1.0), lags=3)
            >>> forecaster.set_params({'estimator__alpha': 0.5})
            >>> forecaster.estimator.alpha
            0.5
        """

        pass

    def set_lags(
        self, lags: int | list[int] | np.ndarray[int] | range[int] | None = None
    ) -> None:
        """Set new value to the attribute lags.

        Attributes max_lag and window_size are also updated.

        Args:
            lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
                If int: include lags from 1 to lags (included). If list, 1d numpy ndarray,
                or range: include only lags present in lags, all elements must be int.
                If None: no lags are included as predictors. Default is None.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster.set_lags(lags=5)
            >>> forecaster.lags
            array([1, 2, 3, 4, 5])
        """

        pass

    def set_window_features(
        self, window_features: object | list[object] | None = None
    ) -> None:
        """Set new value to the attribute window_features.

        Attributes max_size_window_features, window_features_names,
        window_features_class_names and window_size are also updated.

        Args:
            window_features: Instance or list of instances used to create window features.
                Window features are created from the original time series and are
                included as predictors. Default is None.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> window_feat = RollingFeatures(stats='mean', window_sizes=3)
            >>> forecaster.set_window_features(window_features=window_feat)
            >>> forecaster.window_features
            [RollingFeatures(stats=['mean'], window_sizes=[3])]
        """

        pass

    def get_tags(self) -> dict[str, Any]:
        """Return the tags that characterize the behavior of the forecaster.

        Returns:
            Dictionary with forecaster tags describing behavior and capabilities.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> tags = forecaster.get_tags()
            >>> tags['forecaster_task']
            'regression'
        """

        return self.__spotforecast_tags__

    def summary(self) -> None:
        """Show forecaster information.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster.summary()
            ForecasterRecursive
            ===================
            Estimator: Ridge()
            Lags: [1 2 3]
            ...
        """

        print(self.__repr__())

    def __setstate__(self, state: dict) -> None:
        """Custom __setstate__ to ensure backward compatibility when unpickling.

        This method is called when an object is unpickled (deserialized).
        It handles the migration of deprecated attributes to their new names.

        Args:
            state: The state dictionary from the pickled object.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pickle
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> pickled_forecaster = pickle.dumps(forecaster)
            >>> unpickled_forecaster = pickle.loads(pickled_forecaster)
        """

        if "regressor" in state and "estimator" not in state:
            state["estimator"] = state.pop("regressor")

        self.__dict__.update(state)

    @property
    def regressor(self) -> Any:
        """Deprecated property. Use estimator instead.

        Returns:
            The estimator object.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster.regressor # Raises FutureWarning
            Ridge()
        """
        warnings.warn(
            "The `regressor` attribute is deprecated and will be removed in future "
            "versions. Use `estimator` instead.",
            FutureWarning,
        )
        return self.estimator

regressor property

Deprecated property. Use estimator instead.

Returns:

Type Description
Any

The estimator object.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> forecaster.regressor # Raises FutureWarning
Ridge()

__setstate__(state)

Custom setstate to ensure backward compatibility when unpickling.

This method is called when an object is unpickled (deserialized). It handles the migration of deprecated attributes to their new names.

Parameters:

Name Type Description Default
state dict

The state dictionary from the pickled object.

required

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pickle
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> pickled_forecaster = pickle.dumps(forecaster)
>>> unpickled_forecaster = pickle.loads(pickled_forecaster)
Source code in src/spotforecast2_safe/forecaster/base.py
def __setstate__(self, state: dict) -> None:
    """Custom __setstate__ to ensure backward compatibility when unpickling.

    This method is called when an object is unpickled (deserialized).
    It handles the migration of deprecated attributes to their new names.

    Args:
        state: The state dictionary from the pickled object.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pickle
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> pickled_forecaster = pickle.dumps(forecaster)
        >>> unpickled_forecaster = pickle.loads(pickled_forecaster)
    """

    if "regressor" in state and "estimator" not in state:
        state["estimator"] = state.pop("regressor")

    self.__dict__.update(state)

create_train_X_y(y, exog=None) abstractmethod

Create training matrices from univariate time series and exogenous variables.

Parameters:

Name Type Description Default
y Series

Training time series.

required
exog Series | DataFrame | None

Exogenous variable(s) included as predictor(s). Must have the same number of observations as y and their indexes must be aligned. Default is None.

None

Returns:

Type Description
DataFrame

Tuple containing X_train (training values/predictors with shape

Series

(len(y) - max_lag, len(lags))) and y_train (target values of the

tuple[DataFrame, Series]

time series related to each row of X_train with shape (len(y) - max_lag,)).

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> y = pd.Series(np.arange(10), name='y')
>>> X_train, y_train = forecaster.create_train_X_y(y)
>>> X_train.head(2)
   lag_1  lag_2  lag_3
3    2.0    1.0    0.0
4    3.0    2.0    1.0
>>> y_train.head(2)
3    3
4    4
Name: y, dtype: int64
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def create_train_X_y(
    self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None
) -> tuple[pd.DataFrame, pd.Series]:
    """Create training matrices from univariate time series and exogenous variables.

    Args:
        y: Training time series.
        exog: Exogenous variable(s) included as predictor(s). Must have the same
            number of observations as y and their indexes must be aligned.
            Default is None.

    Returns:
        Tuple containing X_train (training values/predictors with shape
        (len(y) - max_lag, len(lags))) and y_train (target values of the
        time series related to each row of X_train with shape (len(y) - max_lag,)).

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> y = pd.Series(np.arange(10), name='y')
        >>> X_train, y_train = forecaster.create_train_X_y(y)
        >>> X_train.head(2)
           lag_1  lag_2  lag_3
        3    2.0    1.0    0.0
        4    3.0    2.0    1.0
        >>> y_train.head(2)
        3    3
        4    4
        Name: y, dtype: int64
    """

    pass

fit(y, exog=None) abstractmethod

Training Forecaster.

Parameters:

Name Type Description Default
y Series

Training time series.

required
exog Series | DataFrame | None

Exogenous variable(s) included as predictor(s). Must have the same number of observations as y and their indexes must be aligned so that y[i] is regressed on exog[i]. Default is None.

None

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> y = pd.Series(np.arange(10), name='y')
>>> forecaster.fit(y)
>>> forecaster.is_fitted
True
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def fit(self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None) -> None:
    """Training Forecaster.

    Args:
        y: Training time series.
        exog: Exogenous variable(s) included as predictor(s). Must have the same
            number of observations as y and their indexes must be aligned so
            that y[i] is regressed on exog[i]. Default is None.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> y = pd.Series(np.arange(10), name='y')
        >>> forecaster.fit(y)
        >>> forecaster.is_fitted
        True
    """

    pass

get_tags()

Return the tags that characterize the behavior of the forecaster.

Returns:

Type Description
dict[str, Any]

Dictionary with forecaster tags describing behavior and capabilities.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> tags = forecaster.get_tags()
>>> tags['forecaster_task']
'regression'
Source code in src/spotforecast2_safe/forecaster/base.py
def get_tags(self) -> dict[str, Any]:
    """Return the tags that characterize the behavior of the forecaster.

    Returns:
        Dictionary with forecaster tags describing behavior and capabilities.

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> tags = forecaster.get_tags()
        >>> tags['forecaster_task']
        'regression'
    """

    return self.__spotforecast_tags__

predict(steps, last_window=None, exog=None) abstractmethod

Predict n steps ahead.

Parameters:

Name Type Description Default
steps int

Number of steps to predict.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If None, the values stored in last_window are used to calculate the initial predictors, and the predictions start right after training data. Default is None.

None
exog Series | DataFrame | None

Exogenous variable(s) included as predictor(s). Default is None.

None

Returns:

Type Description
Series

Predicted values as a pandas Series.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> y = pd.Series(np.arange(10), name='y')
>>> forecaster.fit(y)
>>> forecaster.predict(steps=3)
10    9.5
11    9.0
12    8.5
Name: pred, dtype: float64
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def predict(
    self,
    steps: int,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
) -> pd.Series:
    """Predict n steps ahead.

    Args:
        steps: Number of steps to predict.
        last_window: Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1). If None, the values stored in
            last_window are used to calculate the initial predictors, and the
            predictions start right after training data. Default is None.
        exog: Exogenous variable(s) included as predictor(s). Default is None.

    Returns:
        Predicted values as a pandas Series.

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> y = pd.Series(np.arange(10), name='y')
        >>> forecaster.fit(y)
        >>> forecaster.predict(steps=3)
        10    9.5
        11    9.0
        12    8.5
        Name: pred, dtype: float64
    """

    pass

set_lags(lags=None)

Set new value to the attribute lags.

Attributes max_lag and window_size are also updated.

Parameters:

Name Type Description Default
lags int | list[int] | ndarray[int] | range[int] | None

Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1. If int: include lags from 1 to lags (included). If list, 1d numpy ndarray, or range: include only lags present in lags, all elements must be int. If None: no lags are included as predictors. Default is None.

None

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> forecaster.set_lags(lags=5)
>>> forecaster.lags
array([1, 2, 3, 4, 5])
Source code in src/spotforecast2_safe/forecaster/base.py
def set_lags(
    self, lags: int | list[int] | np.ndarray[int] | range[int] | None = None
) -> None:
    """Set new value to the attribute lags.

    Attributes max_lag and window_size are also updated.

    Args:
        lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
            If int: include lags from 1 to lags (included). If list, 1d numpy ndarray,
            or range: include only lags present in lags, all elements must be int.
            If None: no lags are included as predictors. Default is None.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> forecaster.set_lags(lags=5)
        >>> forecaster.lags
        array([1, 2, 3, 4, 5])
    """

    pass

set_params(params) abstractmethod

Set new values to the parameters of the scikit-learn model stored in the forecaster.

Parameters:

Name Type Description Default
params dict[str, object]

Parameters values dictionary.

required

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(alpha=1.0), lags=3)
>>> forecaster.set_params({'estimator__alpha': 0.5})
>>> forecaster.estimator.alpha
0.5
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def set_params(self, params: dict[str, object]) -> None:
    """Set new values to the parameters of the scikit-learn model stored in the forecaster.

    Args:
        params: Parameters values dictionary.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(alpha=1.0), lags=3)
        >>> forecaster.set_params({'estimator__alpha': 0.5})
        >>> forecaster.estimator.alpha
        0.5
    """

    pass

set_window_features(window_features=None)

Set new value to the attribute window_features.

Attributes max_size_window_features, window_features_names, window_features_class_names and window_size are also updated.

Parameters:

Name Type Description Default
window_features object | list[object] | None

Instance or list of instances used to create window features. Window features are created from the original time series and are included as predictors. Default is None.

None

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> window_feat = RollingFeatures(stats='mean', window_sizes=3)
>>> forecaster.set_window_features(window_features=window_feat)
>>> forecaster.window_features
[RollingFeatures(stats=['mean'], window_sizes=[3])]
Source code in src/spotforecast2_safe/forecaster/base.py
def set_window_features(
    self, window_features: object | list[object] | None = None
) -> None:
    """Set new value to the attribute window_features.

    Attributes max_size_window_features, window_features_names,
    window_features_class_names and window_size are also updated.

    Args:
        window_features: Instance or list of instances used to create window features.
            Window features are created from the original time series and are
            included as predictors. Default is None.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> window_feat = RollingFeatures(stats='mean', window_sizes=3)
        >>> forecaster.set_window_features(window_features=window_feat)
        >>> forecaster.window_features
        [RollingFeatures(stats=['mean'], window_sizes=[3])]
    """

    pass

summary()

Show forecaster information.

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> forecaster.summary()
ForecasterRecursive
===================
Estimator: Ridge()
Lags: [1 2 3]
...
Source code in src/spotforecast2_safe/forecaster/base.py
def summary(self) -> None:
    """Show forecaster information.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> forecaster.summary()
        ForecasterRecursive
        ===================
        Estimator: Ridge()
        Lags: [1 2 3]
        ...
    """

    print(self.__repr__())

ForecasterRecursive

Bases: ForecasterBase

Recursive autoregressive forecaster for scikit-learn compatible estimators.

This class turns any estimator compatible with the scikit-learn API into a recursive autoregressive (multi-step) forecaster. The forecaster learns to predict future values by using lagged values of the target variable and optional exogenous features. Predictions are made iteratively, where each step uses previous predictions as input for the next step (recursive strategy).

Parameters:

Name Type Description Default
estimator object

Scikit-learn compatible estimator for regression. If None, a default estimator will be initialized. Can also be passed via regressor parameter.

None
lags Union[int, List[int], ndarray, range, None]

Lagged values of the target variable to use as predictors. Can be an integer (uses lags from 1 to lags), list of integers, numpy array, or range. At least one of lags or window_features must be provided. Defaults to None.

None
window_features Union[object, List[object], None]

List of window feature objects to compute features from the target variable. Each object must implement transform_batch() method. At least one of lags or window_features must be provided. Defaults to None.

None
transformer_y Optional[object]

Transformer object for the target variable. Must implement fit() and transform() methods. Applied before training and predictions. Defaults to None.

None
transformer_exog Optional[object]

Transformer object for exogenous variables. Must implement fit() and transform() methods. Applied before training and predictions. Defaults to None.

None
weight_func Optional[Callable]

Function to compute sample weights for training. Must accept an index and return an array of weights. Defaults to None.

None
differentiation Optional[int]

Order of differencing to apply to the target variable. Must be a positive integer. Differencing is applied before creating lags. Defaults to None.

None
fit_kwargs Optional[Dict[str, object]]

Dictionary of additional keyword arguments to pass to the estimator's fit() method. Defaults to None.

None
binner_kwargs Optional[Dict[str, object]]

Dictionary of keyword arguments for QuantileBinner used in probabilistic predictions. Defaults to {'n_bins': 10, 'method': 'linear'}.

None
forecaster_id Union[str, int, None]

Identifier for the forecaster instance. Can be a string or integer. Used for tracking and logging purposes. Defaults to None.

None
regressor object

Alternative parameter name for estimator. If provided, used instead of estimator. Defaults to None.

None

Attributes:

Name Type Description
estimator

Fitted scikit-learn estimator.

lags

Lag indices used in the model.

lags_names

Names of lag features (e.g., ['lag_1', 'lag_2']).

window_features

List of window feature transformers.

window_features_names

Names of window features.

window_size

Maximum window size needed (max of lags and window features).

transformer_y

Transformer for target variable.

transformer_exog

Transformer for exogenous variables.

weight_func

Function for sample weighting.

differentiation

Order of differencing applied.

differentiator

TimeSeriesDifferentiator instance if differencing is used.

is_fitted

Boolean indicating if forecaster has been fitted.

fit_date

Timestamp of the last fit operation.

last_window_

Last window_size observations from training data.

index_type_

Type of index in training data (RangeIndex or DatetimeIndex).

index_freq_

Frequency of DatetimeIndex if applicable.

training_range_

First and last index values of training data.

series_name_in_

Name of the target series.

exog_in_

Boolean indicating if exogenous variables were used in training.

exog_names_in_

Names of exogenous variables.

exog_type_in_

Type of exogenous input (Series or DataFrame).

X_train_features_names_out_

Names of all training features.

in_sample_residuals_

Residuals from training set.

in_sample_residuals_by_bin_

Residuals grouped by bins for probabilistic pred.

forecaster_id

Identifier for the forecaster instance.

Note
  • Either lags or window_features (or both) must be provided during initialization.
  • The forecaster uses a recursive strategy where each multi-step prediction depends on previous predictions within the same forecast horizon.
  • Exogenous variables must have the same index as the target variable and must be available for the entire prediction horizon.
  • The forecaster supports point predictions, prediction intervals, bootstrapping, quantile predictions, and probabilistic forecasts via conformal methods.

Examples:

Create a basic forecaster with lags:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> y = pd.Series(np.random.randn(100), name='y')
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=10
... )
>>> forecaster.fit(y)
>>> predictions = forecaster.predict(steps=5)

Create a forecaster with window features and transformations:

>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.preprocessing import StandardScaler
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> import pandas as pd
>>> y = pd.Series(np.random.randn(100), name='y')
>>> forecaster = ForecasterRecursive(
...     estimator=RandomForestRegressor(n_estimators=100),
...     lags=[1, 7, 30],
...     window_features=[RollingFeatures(stats='mean', window_sizes=7)],
...     transformer_y=StandardScaler(),
...     differentiation=1
... )
>>> forecaster.fit(y)
>>> predictions = forecaster.predict(steps=10)

Create a forecaster with exogenous variables:

>>> import pandas as pd
>>> from sklearn.linear_model import Ridge
>>> y = pd.Series(np.random.randn(100), name='target')
>>> exog = pd.DataFrame({'temp': np.random.randn(100)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=Ridge(),
...     lags=7,
...     forecaster_id='my_forecaster'
... )
>>> forecaster.fit(y, exog)
>>> exog_future = pd.DataFrame(
...     {'temp': np.random.randn(5)},
...     index=pd.RangeIndex(start=100, stop=105)
... )
>>> predictions = forecaster.predict(steps=5, exog=exog_future)

Create a forecaster with probabilistic prediction configuration:

>>> from sklearn.ensemble import GradientBoostingRegressor
>>> import pandas as pd
>>> y = pd.Series(np.random.randn(100), name='y')
>>> forecaster = ForecasterRecursive(
...     estimator=GradientBoostingRegressor(),
...     lags=14,
...     binner_kwargs={'n_bins': 15, 'method': 'linear'}
... )
>>> forecaster.fit(y, store_in_sample_residuals=True)
>>> predictions = forecaster.predict(steps=5)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
class ForecasterRecursive(ForecasterBase):
    """
    Recursive autoregressive forecaster for scikit-learn compatible estimators.

    This class turns any estimator compatible with the scikit-learn API into a
    recursive autoregressive (multi-step) forecaster. The forecaster learns to predict
    future values by using lagged values of the target variable and optional exogenous
    features. Predictions are made iteratively, where each step uses previous predictions
    as input for the next step (recursive strategy).

    Args:
        estimator: Scikit-learn compatible estimator for regression. If None, a default
            estimator will be initialized. Can also be passed via regressor parameter.
        lags: Lagged values of the target variable to use as predictors. Can be an
            integer (uses lags from 1 to lags), list of integers, numpy array, or range.
            At least one of lags or window_features must be provided. Defaults to None.
        window_features: List of window feature objects to compute features from the
            target variable. Each object must implement transform_batch() method.
            At least one of lags or window_features must be provided. Defaults to None.
        transformer_y: Transformer object for the target variable. Must implement fit()
            and transform() methods. Applied before training and predictions.
            Defaults to None.
        transformer_exog: Transformer object for exogenous variables. Must implement
            fit() and transform() methods. Applied before training and predictions.
            Defaults to None.
        weight_func: Function to compute sample weights for training. Must accept an
            index and return an array of weights. Defaults to None.
        differentiation: Order of differencing to apply to the target variable.
            Must be a positive integer. Differencing is applied before creating lags.
            Defaults to None.
        fit_kwargs: Dictionary of additional keyword arguments to pass to the estimator's
            fit() method. Defaults to None.
        binner_kwargs: Dictionary of keyword arguments for QuantileBinner used in
            probabilistic predictions. Defaults to {'n_bins': 10, 'method': 'linear'}.
        forecaster_id: Identifier for the forecaster instance. Can be a string or
            integer. Used for tracking and logging purposes. Defaults to None.
        regressor: Alternative parameter name for estimator. If provided, used instead
            of estimator. Defaults to None.

    Attributes:
        estimator: Fitted scikit-learn estimator.
        lags: Lag indices used in the model.
        lags_names: Names of lag features (e.g., ['lag_1', 'lag_2']).
        window_features: List of window feature transformers.
        window_features_names: Names of window features.
        window_size: Maximum window size needed (max of lags and window features).
        transformer_y: Transformer for target variable.
        transformer_exog: Transformer for exogenous variables.
        weight_func: Function for sample weighting.
        differentiation: Order of differencing applied.
        differentiator: TimeSeriesDifferentiator instance if differencing is used.
        is_fitted: Boolean indicating if forecaster has been fitted.
        fit_date: Timestamp of the last fit operation.
        last_window_: Last window_size observations from training data.
        index_type_: Type of index in training data (RangeIndex or DatetimeIndex).
        index_freq_: Frequency of DatetimeIndex if applicable.
        training_range_: First and last index values of training data.
        series_name_in_: Name of the target series.
        exog_in_: Boolean indicating if exogenous variables were used in training.
        exog_names_in_: Names of exogenous variables.
        exog_type_in_: Type of exogenous input (Series or DataFrame).
        X_train_features_names_out_: Names of all training features.
        in_sample_residuals_: Residuals from training set.
        in_sample_residuals_by_bin_: Residuals grouped by bins for probabilistic pred.
        forecaster_id: Identifier for the forecaster instance.

    Note:
        - Either lags or window_features (or both) must be provided during initialization.
        - The forecaster uses a recursive strategy where each multi-step prediction
          depends on previous predictions within the same forecast horizon.
        - Exogenous variables must have the same index as the target variable and must
          be available for the entire prediction horizon.
        - The forecaster supports point predictions, prediction intervals, bootstrapping,
          quantile predictions, and probabilistic forecasts via conformal methods.

    Examples:
        Create a basic forecaster with lags:

        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> y = pd.Series(np.random.randn(100), name='y')
        >>> forecaster = ForecasterRecursive(
        ...     estimator=LinearRegression(),
        ...     lags=10
        ... )
        >>> forecaster.fit(y)
        >>> predictions = forecaster.predict(steps=5)

        Create a forecaster with window features and transformations:

        >>> from sklearn.ensemble import RandomForestRegressor
        >>> from sklearn.preprocessing import StandardScaler
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> import pandas as pd
        >>> y = pd.Series(np.random.randn(100), name='y')
        >>> forecaster = ForecasterRecursive(
        ...     estimator=RandomForestRegressor(n_estimators=100),
        ...     lags=[1, 7, 30],
        ...     window_features=[RollingFeatures(stats='mean', window_sizes=7)],
        ...     transformer_y=StandardScaler(),
        ...     differentiation=1
        ... )
        >>> forecaster.fit(y)
        >>> predictions = forecaster.predict(steps=10)

        Create a forecaster with exogenous variables:

        >>> import pandas as pd
        >>> from sklearn.linear_model import Ridge
        >>> y = pd.Series(np.random.randn(100), name='target')
        >>> exog = pd.DataFrame({'temp': np.random.randn(100)}, index=y.index)
        >>> forecaster = ForecasterRecursive(
        ...     estimator=Ridge(),
        ...     lags=7,
        ...     forecaster_id='my_forecaster'
        ... )
        >>> forecaster.fit(y, exog)
        >>> exog_future = pd.DataFrame(
        ...     {'temp': np.random.randn(5)},
        ...     index=pd.RangeIndex(start=100, stop=105)
        ... )
        >>> predictions = forecaster.predict(steps=5, exog=exog_future)

        Create a forecaster with probabilistic prediction configuration:

        >>> from sklearn.ensemble import GradientBoostingRegressor
        >>> import pandas as pd
        >>> y = pd.Series(np.random.randn(100), name='y')
        >>> forecaster = ForecasterRecursive(
        ...     estimator=GradientBoostingRegressor(),
        ...     lags=14,
        ...     binner_kwargs={'n_bins': 15, 'method': 'linear'}
        ... )
        >>> forecaster.fit(y, store_in_sample_residuals=True)
        >>> predictions = forecaster.predict(steps=5)
    """

    def __init__(
        self,
        estimator: object = None,
        lags: Union[int, List[int], np.ndarray, range, None] = None,
        window_features: Union[object, List[object], None] = None,
        transformer_y: Optional[object] = None,
        transformer_exog: Optional[object] = None,
        weight_func: Optional[Callable] = None,
        differentiation: Optional[int] = None,
        fit_kwargs: Optional[Dict[str, object]] = None,
        binner_kwargs: Optional[Dict[str, object]] = None,
        forecaster_id: Union[str, int, None] = None,
        regressor: object = None,
    ) -> None:

        self.estimator = copy(initialize_estimator(estimator, regressor))
        self.transformer_y = transformer_y
        self.transformer_exog = transformer_exog
        self.weight_func = weight_func
        self.source_code_weight_func = None
        self.differentiation = differentiation
        self.differentiation_max = None
        self.differentiator = None
        self.last_window_ = None
        self.index_type_ = None
        self.index_freq_ = None
        self.training_range_ = None
        self.series_name_in_ = None
        self.exog_in_ = False
        self.exog_names_in_ = None
        self.exog_type_in_ = None
        self.exog_dtypes_in_ = None
        self.exog_dtypes_out_ = None
        self.X_train_window_features_names_out_ = None
        self.X_train_exog_names_out_ = None
        self.X_train_features_names_out_ = None
        self.in_sample_residuals_ = None
        self.out_sample_residuals_ = None
        self.in_sample_residuals_by_bin_ = None
        self.out_sample_residuals_by_bin_ = None
        self.creation_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
        self.is_fitted = False
        self.fit_date = None
        try:
            from spotforecast2_safe import __version__

            self.spotforecast_version = __version__
        except ImportError:
            self.spotforecast_version = "unknown"
        self.python_version = sys.version.split(" ")[0]
        self.forecaster_id = forecaster_id
        self._probabilistic_mode = "binned"

        (
            self.lags,
            self.lags_names,
            self.max_lag,
        ) = initialize_lags(type(self).__name__, lags)
        (
            self.window_features,
            self.window_features_names,
            self.max_size_window_features,
        ) = initialize_window_features(window_features)
        if self.window_features is None and self.lags is None:
            raise ValueError(
                "At least one of the arguments `lags` or `window_features` "
                "must be different from None. This is required to create the "
                "predictors used in training the forecaster."
            )

        self.window_size = max(
            [
                ws
                for ws in [self.max_lag, self.max_size_window_features]
                if ws is not None
            ]
        )
        self.window_features_class_names = None
        if window_features is not None:
            self.window_features_class_names = [
                type(wf).__name__ for wf in self.window_features
            ]

        self.weight_func, self.source_code_weight_func, _ = initialize_weights(
            forecaster_name=type(self).__name__,
            estimator=estimator,
            weight_func=weight_func,
            series_weights=None,
        )

        if differentiation is not None:
            if not isinstance(differentiation, int) or differentiation < 1:
                raise ValueError(
                    f"Argument `differentiation` must be an integer equal to or "
                    f"greater than 1. Got {differentiation}."
                )
            self.differentiation = differentiation
            self.differentiation_max = differentiation
            self.window_size += differentiation
            self.differentiator = TimeSeriesDifferentiator(
                order=differentiation  # , window_size=self.window_size # TODO: TimeSeriesDifferentiator in preprocessing created only takes order, add window_size if needed
            )

        self.fit_kwargs = check_select_fit_kwargs(
            estimator=estimator, fit_kwargs=fit_kwargs
        )

        self.binner_kwargs = binner_kwargs
        if binner_kwargs is None:
            self.binner_kwargs = {
                "n_bins": 10,
                "method": "linear",
            }
        self.binner = QuantileBinner(**self.binner_kwargs)
        self.binner_intervals_ = None

        self.__spotforecast_tags__ = {
            "library": "spotforecast",
            "forecaster_name": "ForecasterRecursive",
            "forecaster_task": "regression",
            "forecasting_scope": "single-series",  # single-series | global
            "forecasting_strategy": "recursive",  # recursive | direct | deep_learning
            "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
            "requires_index_frequency": True,
            "allowed_input_types_series": ["pandas.Series"],
            "supports_exog": True,
            "allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
            "handles_missing_values_series": False,
            "handles_missing_values_exog": True,
            "supports_lags": True,
            "supports_window_features": True,
            "supports_transformer_series": True,
            "supports_transformer_exog": True,
            "supports_weight_func": True,
            "supports_differentiation": True,
            "prediction_types": [
                "point",
                "interval",
                "bootstrapping",
                "quantiles",
                "distribution",
            ],
            "supports_probabilistic": True,
            "probabilistic_methods": ["bootstrapping", "conformal"],
            "handles_binned_residuals": True,
        }

    def __repr__(self) -> str:
        """
        Information displayed when a ForecasterRecursive object is printed.

        Returns:
            str: String representation of the forecaster with key information about its configuration and state.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> print(forecaster)  # doctest: +ELLIPSIS
            =========================
            ForecasterRecursive
            =========================
            Estimator: LinearRegression
            Lags: [1, 2, 3]
            Window features: []
            Window size: 3
            Series name: None
            Exogenous included: False
            Exogenous names: None
            Transformer for y: None
            Transformer for exog: None
            Weight function included: False
            Differentiation order: None
            Training range: None
            Training index type: None
            Training index frequency: None
            Estimator parameters: {...}
            fit_kwargs: {...}
            Creation date: ...
            Last fit date: None
            spotforecast version: ...
            Python version: ...
            Forecaster id: None

        """

        params = (
            self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
        )
        exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None

        info = (
            f"{'=' * len(type(self).__name__)} \n"
            f"{type(self).__name__} \n"
            f"{'=' * len(type(self).__name__)} \n"
            f"Estimator: {type(self.estimator).__name__} \n"
            f"Lags: {self.lags} \n"
            f"Window features: {self.window_features_names} \n"
            f"Window size: {self.window_size} \n"
            f"Series name: {self.series_name_in_} \n"
            f"Exogenous included: {self.exog_in_} \n"
            f"Exogenous names: {exog_names_in_} \n"
            f"Transformer for y: {self.transformer_y} \n"
            f"Transformer for exog: {self.transformer_exog} \n"
            f"Weight function included: {True if self.weight_func is not None else False} \n"
            f"Differentiation order: {self.differentiation} \n"
            f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
            f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
            f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
            f"Estimator parameters: {params} \n"
            f"fit_kwargs: {self.fit_kwargs} \n"
            f"Creation date: {self.creation_date} \n"
            f"Last fit date: {self.fit_date} \n"
            f"spotforecast version: {self.spotforecast_version} \n"
            f"Python version: {self.python_version} \n"
            f"Forecaster id: {self.forecaster_id} \n"
        )

        return info

    def _repr_html_(self) -> str:
        """
        HTML representation of the object.
        The "General Information" section is expanded by default.

        Returns:
            HTML string representation of the forecaster.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster._repr_html_()  # doctest: +ELLIPSIS
            '<div class="container-...">...</div>'
        """

        params = (
            self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
        )
        exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None

        style, unique_id = get_style_repr_html(self.is_fitted)

        content = f"""
        <div class="container-{unique_id}">
            <p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
            <details open>
                <summary>General Information</summary>
                <ul>
                    <li><strong>Estimator:</strong> {type(self.estimator).__name__}</li>
                    <li><strong>Lags:</strong> {self.lags}</li>
                    <li><strong>Window features:</strong> {self.window_features_names}</li>
                    <li><strong>Window size:</strong> {self.window_size}</li>
                    <li><strong>Series name:</strong> {self.series_name_in_}</li>
                    <li><strong>Exogenous included:</strong> {self.exog_in_}</li>
                    <li><strong>Weight function included:</strong> {self.weight_func is not None}</li>
                    <li><strong>Differentiation order:</strong> {self.differentiation}</li>
                    <li><strong>Creation date:</strong> {self.creation_date}</li>
                    <li><strong>Last fit date:</strong> {self.fit_date}</li>
                    <li><strong>spotforecast version:</strong> {self.spotforecast_version}</li>
                    <li><strong>Python version:</strong> {self.python_version}</li>
                    <li><strong>Forecaster id:</strong> {self.forecaster_id}</li>
                </ul>
            </details>
            <details>
                <summary>Exogenous Variables</summary>
                <ul>
                    {exog_names_in_}
                </ul>
            </details>
            <details>
                <summary>Data Transformations</summary>
                <ul>
                    <li><strong>Transformer for y:</strong> {self.transformer_y}</li>
                    <li><strong>Transformer for exog:</strong> {self.transformer_exog}</li>
                </ul>
            </details>
            <details>
                <summary>Training Information</summary>
                <ul>
                    <li><strong>Training range:</strong> {self.training_range_.to_list() if self.is_fitted else 'Not fitted'}</li>
                    <li><strong>Training index type:</strong> {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else 'Not fitted'}</li>
                    <li><strong>Training index frequency:</strong> {self.index_freq_ if self.is_fitted else 'Not fitted'}</li>
                </ul>
            </details>
            <details>
                <summary>Estimator Parameters</summary>
                <ul>
                    {params}
                </ul>
            </details>
            <details>
                <summary>Fit Kwargs</summary>
                <ul>
                    {self.fit_kwargs}
                </ul>
            </details>
        </div>
        """

        return style + content

    def __setstate__(self, state: dict) -> None:
        """
        Custom __setstate__ to ensure backward compatibility when unpickling.
        Only sets __spotforecast_tags__ if not present, preserving custom tags.
        """
        super().__setstate__(state)
        if not hasattr(self, "__spotforecast_tags__"):
            self.__spotforecast_tags__ = {
                "library": "spotforecast",
                "forecaster_name": "ForecasterRecursive",
                "forecaster_task": "regression",
                "forecasting_scope": "single-series",
                "forecasting_strategy": "recursive",
                "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
                "requires_index_frequency": True,
                "allowed_input_types_series": ["pandas.Series"],
                "supports_exog": True,
                "allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
                "handles_missing_values_series": False,
                "handles_missing_values_exog": True,
                "supports_lags": True,
                "supports_window_features": True,
                "supports_transformer_series": True,
                "supports_transformer_exog": True,
                "supports_weight_func": True,
                "supports_differentiation": True,
                "prediction_types": [
                    "point",
                    "interval",
                    "bootstrapping",
                    "quantiles",
                    "distribution",
                ],
                "supports_probabilistic": True,
                "probabilistic_methods": ["bootstrapping", "conformal"],
                "handles_binned_residuals": True,
            }

    def _create_lags(
        self,
        y: np.ndarray,
        X_as_pandas: bool = False,
        train_index: Optional[pd.Index] = None,
    ) -> Tuple[Optional[Union[np.ndarray, pd.DataFrame]], np.ndarray]:
        """
        Create lagged predictors and aligned target values.

        Args:
            y: Target values used to build lag features. Expected shape is
                (n_samples,) or (n_samples, 1).
            X_as_pandas: If True, returns lagged features as a pandas DataFrame.
            train_index: Index to use for the lagged feature DataFrame when
                `X_as_pandas` is True.

        Returns:
            Tuple containing:
                - X_data: Lagged predictors with shape (n_rows, n_lags) or None
                  if no lags are configured.
                - y_data: Target values aligned to the lagged predictors with
                  shape (n_rows,).

        Raises:
            ValueError: If `X_as_pandas` is True but `train_index` is not provided.
            ValueError: If the length of `y` is not sufficient to create the
                specified lags.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(lags=3)
            >>> y = np.arange(10)
            >>> train_index = pd.RangeIndex(start=3, stop=10)
            >>> X_data, y_data = forecaster._create_lags(y=y, X_as_pandas=True, train_index=train_index)
            >>> isinstance(X_data, pd.DataFrame)
            True
            >>> X_data.shape
            (7, 3)
            >>> y_data.shape
            (7,)
        """
        if X_as_pandas and train_index is None:
            raise ValueError(
                "If `X_as_pandas` is True, `train_index` must be provided."
            )

        if len(y) <= self.window_size:
            raise ValueError(
                f"Length of `y` must be greater than the maximum window size "
                f"needed by the forecaster.\n"
                f"    Length `y`: {len(y)}.\n"
                f"    Max window size: {self.window_size}."
            )

        X_data = None
        if self.lags is not None:
            # y = y.ravel() # Assuming y is already raveled
            # Using stride_tricks for sliding window
            y_strided = np.lib.stride_tricks.sliding_window_view(y, self.window_size)[
                :-1
            ]
            X_data = y_strided[:, self.window_size - self.lags]

            if X_as_pandas:
                X_data = pd.DataFrame(
                    data=X_data, columns=self.lags_names, index=train_index
                )

        y_data = y[self.window_size :]

        return X_data, y_data

    def _create_window_features(
        self,
        y: pd.Series,
        train_index: pd.Index,
        X_as_pandas: bool = False,
    ) -> Tuple[List[Union[np.ndarray, pd.DataFrame]], List[str]]:
        """
        Generate window features from the target series.

        Args:
            y: Target series used to compute window features. Must be a pandas
                Series with an index aligned to `train_index` after trimming.
            train_index: Index for the training rows to align the window features.
            X_as_pandas: If True, keeps each window feature matrix as a pandas
                DataFrame; otherwise converts to NumPy arrays.

        Returns:
            Tuple containing:
                - X_train_window_features: List of window feature matrices, one
                  per window feature transformer.
                - X_train_window_features_names_out_: List of feature names for
                  all generated window features.

        Raises:
            TypeError: If any window feature's `transform_batch` method does not
                return a pandas DataFrame.
            ValueError: If the output DataFrame from any window feature does not
                have the same number of rows as `train_index` or if the index
                does not match `train_index`.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> train_index = y.index[3:]  # Assuming window_size is 3
            >>> X_train_window_features, feature_names = forecaster._create_window_features(
            ...     y=y,
            ...     train_index=train_index,
            ...     X_as_pandas=True
            ... )
            >>> isinstance(X_train_window_features[0], pd.DataFrame)
            True
            >>> X_train_window_features[0].shape[0] == len(train_index)
            True
            >>> (X_train_window_features[0].index == train_index).all()
            True

        """

        len_train_index = len(train_index)
        X_train_window_features = []
        X_train_window_features_names_out_ = []
        for wf in self.window_features:
            X_train_wf = wf.transform_batch(y)
            if not isinstance(X_train_wf, pd.DataFrame):
                raise TypeError(
                    f"The method `transform_batch` of {type(wf).__name__} "
                    f"must return a pandas DataFrame."
                )
            X_train_wf = X_train_wf.iloc[-len_train_index:]
            if not len(X_train_wf) == len_train_index:
                raise ValueError(
                    f"The method `transform_batch` of {type(wf).__name__} "
                    f"must return a DataFrame with the same number of rows as "
                    f"the input time series - `window_size`: {len_train_index}."
                )
            if not (X_train_wf.index == train_index).all():
                raise ValueError(
                    f"The method `transform_batch` of {type(wf).__name__} "
                    f"must return a DataFrame with the same index as "
                    f"the input time series - `window_size`."
                )

            X_train_window_features_names_out_.extend(X_train_wf.columns)
            if not X_as_pandas:
                X_train_wf = X_train_wf.to_numpy()
            X_train_window_features.append(X_train_wf)

        return X_train_window_features, X_train_window_features_names_out_

    def _create_train_X_y(
        self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
    ) -> Tuple[
        pd.DataFrame,
        pd.Series,
        List[str],
        List[str],
        List[str],
        List[str],
        Dict[str, type],
        Dict[str, type],
    ]:
        """Create training predictors and target values.

        Args:
            y: Target series for training. Must be a pandas Series.
            exog:
                Optional exogenous variables for training. Can be a pandas Series or DataFrame.
                Must have the same index as `y` and cover the same time range.

        Returns:
            Tuple containing:
                - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided).
                - y_train: Series of target values aligned with the predictors.
                - X_train_features_names_out_: List of all predictor feature names.
                - lags_names: List of lag feature names.
                - window_features_names: List of window feature names.
                - exog_names_in_: List of exogenous variable names (if exogenous variables are used).
                - exog_dtypes_in_: Dictionary of input data types for exogenous variables.
                - exog_dtypes_out_: Dictionary of output data types for exogenous variables after transformation (if exogenous variables are used).

        Raises:
            ValueError: If the length of `y` is not sufficient to create the specified lags and window features.
            ValueError: If `exog` is provided but does not have the same index as `y` or does not cover the same time range.
            ValueError: If `exog` is provided but contains data types that are not supported after transformation.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> (X_train, y_train, exog_names_in_, window_features_names,
            ...  exog_names_out, feature_names, exog_dtypes_in_,
            ...  exog_dtypes_out_) = forecaster._create_train_X_y(y=y, exog=exog)
            >>> isinstance(X_train, pd.DataFrame)
            True
            >>> isinstance(y_train, pd.Series)
            True
            >>> feature_names == forecaster.lags_names + window_features_names + exog_names_out
            True
        """
        check_y(y=y)
        y = input_to_frame(data=y, input_name="y")

        if len(y) <= self.window_size:
            raise ValueError(
                f"Length of `y` must be greater than the maximum window size "
                f"needed by the forecaster.\n"
                f"    Length `y`: {len(y)}.\n"
                f"    Max window size: {self.window_size}.\n"
                f"    Lags window size: {self.max_lag}.\n"
                f"    Window features window size: {self.max_size_window_features}."
            )

        fit_transformer = False if self.is_fitted else True
        y = transform_dataframe(
            df=y,
            transformer=self.transformer_y,
            fit=fit_transformer,
            inverse_transform=False,
        )
        y_values, y_index = check_extract_values_and_index(data=y, data_label="`y`")
        if y_values.ndim == 2 and y_values.shape[1] == 1:
            y_values = y_values.ravel()
        train_index = y_index[self.window_size :]

        if self.differentiation is not None:
            if not self.is_fitted:
                y_values = self.differentiator.fit_transform(y_values)
            else:
                differentiator = copy(self.differentiator)
                y_values = differentiator.fit_transform(y_values)

        exog_names_in_ = None
        exog_dtypes_in_ = None
        exog_dtypes_out_ = None
        X_as_pandas = False
        if exog is not None:
            check_exog(exog=exog, allow_nan=True)
            exog = input_to_frame(data=exog, input_name="exog")
            _, exog_index = check_extract_values_and_index(
                data=exog, data_label="`exog`", ignore_freq=True, return_values=False
            )

            len_y_original = len(y)
            len_train = len(train_index)
            len_exog = len(exog)

            if not len_exog == len_y_original and not len_exog == len_train:
                raise ValueError(
                    f"Length mismatch for exogenous variables. Expected either:\n"
                    f"  - Full length matching `y`: {len_y_original} observations, OR\n"
                    f"  - Pre-aligned length: {len_train} observations (y length - window_size)\n"
                    f"Got: {len_exog} observations.\n"
                    f"Window size: {self.window_size}"
                )

            if len_exog == len_y_original:
                if not (exog_index == y_index).all():
                    raise ValueError(
                        "When `exog` has the same length as `y`, the index of "
                        "`exog` must be aligned with the index of `y` "
                        "to ensure the correct alignment of values."
                    )
                # Standard case: exog covers full y range, trim by window_size
                exog = exog.iloc[self.window_size :, :]
            else:
                if not (exog_index == train_index).all():
                    raise ValueError(
                        "When `exog` already starts after the first `window_size` "
                        "observations, its index must be aligned with the index "
                        "of `y` starting from `window_size`."
                    )

            exog_names_in_ = exog.columns.to_list()
            exog_dtypes_in_ = get_exog_dtypes(exog=exog)

            exog = transform_dataframe(
                df=exog,
                transformer=self.transformer_exog,
                fit=fit_transformer,
                inverse_transform=False,
            )

            check_exog_dtypes(exog, call_check_exog=True)
            exog_dtypes_out_ = get_exog_dtypes(exog=exog)
            X_as_pandas = any(
                not pd.api.types.is_numeric_dtype(dtype)
                or pd.api.types.is_bool_dtype(dtype)
                for dtype in set(exog.dtypes)
            )

        X_train = []
        X_train_features_names_out_ = []

        # Create lags
        # Note: y_values might have NaNs from differentiation.
        # TODO: check if _create_lags handles this!
        X_train_lags, y_train = self._create_lags(
            y=y_values, X_as_pandas=X_as_pandas, train_index=train_index
        )
        if X_train_lags is not None:
            X_train.append(X_train_lags)
            X_train_features_names_out_.extend(self.lags_names)

        X_train_window_features_names_out_ = None
        if self.window_features is not None:
            n_diff = 0 if self.differentiation is None else self.differentiation
            if isinstance(y_values, pd.Series):
                y_vals_for_wf = y_values.iloc[n_diff:]
                y_index_for_wf = y_index[n_diff:]
            else:
                y_vals_for_wf = y_values[n_diff:]
                y_index_for_wf = y_index[n_diff:]

            y_window_features = pd.Series(y_vals_for_wf, index=y_index_for_wf)
            X_train_window_features, X_train_window_features_names_out_ = (
                self._create_window_features(
                    y=y_window_features,
                    X_as_pandas=X_as_pandas,
                    train_index=train_index,
                )
            )
            X_train.extend(X_train_window_features)
            X_train_features_names_out_.extend(X_train_window_features_names_out_)

        X_train_exog_names_out_ = None
        if exog is not None:
            X_train_exog_names_out_ = exog.columns.to_list()
            if not X_as_pandas:
                exog = exog.to_numpy()
            X_train_features_names_out_.extend(X_train_exog_names_out_)
            X_train.append(exog)

        if len(X_train) == 1:
            X_train = X_train[0]
        else:
            if X_as_pandas:
                X_train = pd.concat(X_train, axis=1)
            else:
                X_train = np.concatenate(X_train, axis=1)

        if X_as_pandas:
            X_train.index = train_index
        else:
            X_train = pd.DataFrame(
                data=X_train, index=train_index, columns=X_train_features_names_out_
            )

        y_train = pd.Series(data=y_train, index=train_index, name="y")

        return (
            X_train,
            y_train,
            exog_names_in_,
            X_train_window_features_names_out_,
            X_train_exog_names_out_,
            X_train_features_names_out_,
            exog_dtypes_in_,
            exog_dtypes_out_,
        )

    def create_train_X_y(
        self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
    ) -> Tuple[pd.DataFrame, pd.Series]:
        """Public method to create training predictors and target values.

        This method is a public wrapper around the internal method `_create_train_X_y`,
        which generates the training predictors and target values based on the provided time series and exogenous variables.
        It ensures that the necessary transformations and feature engineering steps are applied to prepare the data for training the forecaster.

        Args:
            y: Target series for training. Must be a pandas Series.
            exog: Optional exogenous variables for training. Can be a pandas Series or DataFrame. Must have the same index as `y` and cover the same time range. Defaults to None.

        Returns:
            Tuple containing:
                - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided).
                - y_train: Series of target values aligned with the predictors.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
            >>> isinstance(X_train, pd.DataFrame)
            True
            >>> isinstance(y_train, pd.Series)
            True

        """
        output = self._create_train_X_y(y=y, exog=exog)

        return output[0], output[1]

    def _train_test_split_one_step_ahead(
        self,
        y: pd.Series,
        initial_train_size: int,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
    ) -> Tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
        """
        Create matrices needed to train and test the forecaster for one-step-ahead
        predictions.

        Args:
            y: Training time series.
            initial_train_size: Initial size of the training set. It is the number of
                observations used to train the forecaster before making the first
                prediction.
            exog: Exogenous variable/s included as predictor/s. Must have the same
                number of observations as y and their indexes must be aligned.
                Defaults to None.

        Returns:
            Tuple containing:
                - X_train: Predictor values used to train the model as pandas DataFrame.
                - y_train: Target values related to each row of X_train as pandas Series.
                - X_test: Predictor values used to test the model as pandas DataFrame.
                - y_test: Target values related to each row of X_test as pandas Series.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> X_train, y_train, X_test, y_test = forecaster._train_test_split_one_step_ahead(y=y, initial_train_size=20, exog=exog)
            >>> isinstance(X_train, pd.DataFrame)
            True
            >>> isinstance(y_train, pd.Series)
            True
            >>> isinstance(X_test, pd.DataFrame)
            True
            >>> isinstance(y_test, pd.Series)
            True
        """

        is_fitted = self.is_fitted
        self.is_fitted = False
        X_train, y_train, *_ = self._create_train_X_y(
            y=y.iloc[:initial_train_size],
            exog=exog.iloc[:initial_train_size] if exog is not None else None,
        )

        test_init = initial_train_size - self.window_size
        self.is_fitted = True
        X_test, y_test, *_ = self._create_train_X_y(
            y=y.iloc[test_init:],
            exog=exog.iloc[test_init:] if exog is not None else None,
        )

        self.is_fitted = is_fitted

        return X_train, y_train, X_test, y_test

    def get_params(self, deep: bool = True) -> Dict[str, object]:
        """
        Get parameters for this forecaster.

        Args:
            deep: If True, will return the parameters for this forecaster and
                contained sub-objects that are estimators.

        Returns:
            params: Dictionary of parameter names mapped to their values.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.get_params()  # doctest: +ELLIPSIS
            {
                'estimator': LinearRegression(), 'lags': 3, 'window_features': None,
                'transformer_y': None, 'transformer_exog': None, 'weight_func': None,
                'differentiation': None, 'fit_kwargs': {}, 'binner_kwargs': None, 'forecaster_id': '...'}
        """
        params = {}
        for key in [
            "estimator",
            "lags",
            "window_features",
            "transformer_y",
            "transformer_exog",
            "weight_func",
            "differentiation",
            "fit_kwargs",
            "binner_kwargs",
            "forecaster_id",
        ]:
            if hasattr(self, key):
                params[key] = getattr(self, key)

        if not deep:
            return params

        if hasattr(self, "estimator") and self.estimator is not None:
            if hasattr(self.estimator, "get_params"):
                for key, value in self.estimator.get_params(deep=True).items():
                    params[f"estimator__{key}"] = value

        return params

    def set_params(
        self, params: Dict[str, object] = None, **kwargs: object
    ) -> "ForecasterRecursive":
        """
        Set the parameters of this forecaster.

        Args:
            params: Optional dictionary of parameter names mapped to their new values.
                If provided, these parameters are set first.
            **kwargs: Dictionary of parameter names mapped to their new values.
                Parameters can be for the forecaster itself or for the contained estimator (using the `estimator__` prefix).

        Returns:
            self: The forecaster instance with updated parameters.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.set_params(estimator__fit_intercept=False)
            >>> forecaster.estimator.get_params()["fit_intercept"]
            False
        """

        # Merge params dict and kwargs
        all_params = {}
        if params is not None:
            all_params.update(params)
        all_params.update(kwargs)

        if not all_params:
            return self

        valid_params = self.get_params(deep=True)
        nested_params = {}

        for key, value in all_params.items():
            if key not in valid_params and "__" not in key:
                # Relaxed check for now
                pass

            if "__" in key:
                obj_name, param_name = key.split("__", 1)
                if obj_name not in nested_params:
                    nested_params[obj_name] = {}
                nested_params[obj_name][param_name] = value
            else:
                setattr(self, key, value)

        for obj_name, obj_params in nested_params.items():
            if hasattr(self, obj_name):
                obj = getattr(self, obj_name)
                if hasattr(obj, "set_params"):
                    obj.set_params(**obj_params)
                else:
                    for param_name, value in obj_params.items():
                        setattr(obj, param_name, value)

        return self

    def fit(
        self,
        y: pd.Series,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
        store_last_window: bool = True,
        store_in_sample_residuals: bool = False,
        random_state: int = 123,
        suppress_warnings: bool = False,
    ) -> None:
        """
        Fit the forecaster to the training data.

        Args:
            y:
                  Target series for training. Must be a pandas Series.
            exog:
                  Optional exogenous variables for training. Can be a pandas Series or DataFrame.Must have the same index as `y` and cover the same time range. Defaults to None.
            store_last_window:
                  Whether to store the last window of the training series for use in prediction. Defaults to True.
            store_in_sample_residuals:
                  Whether to store in-sample residuals after fitting, which can be used for certain probabilistic prediction methods. Defaults to False.
            random_state:
                  Random seed for reproducibility when sampling residuals if `store_in_sample_residuals` is True. Defaults to 123.
            suppress_warnings:
                  Whether to suppress warnings during fitting, such as those related to insufficient data length for lags or window features. Defaults to False.

        Returns:
            None

        Examples:
                 >>> import numpy as np
                 >>> import pandas as pd
                 >>> from sklearn.linear_model import LinearRegression
                 >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
                 >>> from spotforecast2_safe.preprocessing import RollingFeatures
                 >>> y = pd.Series(np.arange(30), name='y')
                 >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
                 >>> forecaster = ForecasterRecursive(
                 ...     estimator=LinearRegression(),
                 ...     lags=3,
                 ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
                 ... )
                 >>> forecaster.fit(y=y, exog=exog, store_in_sample_residuals=True)
        """

        set_skforecast_warnings(suppress_warnings, action="ignore")

        # Reset values in case the forecaster has already been fitted.
        self.last_window_ = None
        self.index_type_ = None
        self.index_freq_ = None
        self.training_range_ = None
        self.series_name_in_ = None
        self.exog_in_ = False
        self.exog_names_in_ = None
        self.exog_type_in_ = None
        self.exog_dtypes_in_ = None
        self.exog_dtypes_out_ = None
        self.X_train_window_features_names_out_ = None
        self.X_train_exog_names_out_ = None
        self.X_train_features_names_out_ = None
        self.in_sample_residuals_ = None
        self.in_sample_residuals_by_bin_ = None
        self.binner_intervals_ = None
        self.is_fitted = False
        self.fit_date = None

        (
            X_train,
            y_train,
            exog_names_in_,
            X_train_window_features_names_out_,
            X_train_exog_names_out_,
            X_train_features_names_out_,
            exog_dtypes_in_,
            exog_dtypes_out_,
        ) = self._create_train_X_y(y=y, exog=exog)

        sample_weight = self.create_sample_weights(X_train=X_train)

        if sample_weight is not None:
            self.estimator.fit(
                X=X_train,
                y=y_train,
                sample_weight=sample_weight,
                **self.fit_kwargs,
            )
        else:
            self.estimator.fit(X=X_train, y=y_train, **self.fit_kwargs)

        self.X_train_window_features_names_out_ = X_train_window_features_names_out_
        self.X_train_features_names_out_ = X_train_features_names_out_

        self.is_fitted = True
        self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
        self.training_range_ = y.index[[0, -1]]
        self.index_type_ = type(y.index)
        if isinstance(y.index, pd.DatetimeIndex):
            self.index_freq_ = y.index.freqstr
        else:
            try:
                self.index_freq_ = y.index.step
            except AttributeError:
                self.index_freq_ = None

        if exog is not None:
            self.exog_in_ = True
            self.exog_type_in_ = type(exog)
            self.exog_names_in_ = exog_names_in_
            self.exog_dtypes_in_ = exog_dtypes_in_
            self.exog_dtypes_out_ = exog_dtypes_out_
            self.X_train_exog_names_out_ = X_train_exog_names_out_

        self.series_name_in_ = y.name if y.name is not None else "y"

        # NOTE: This is done to save time during fit in functions such as backtesting()
        if self._probabilistic_mode is not False:
            self._binning_in_sample_residuals(
                y_true=y_train.to_numpy(),
                y_pred=self.estimator.predict(X_train).ravel(),
                store_in_sample_residuals=store_in_sample_residuals,
                random_state=random_state,
            )

        if store_last_window:
            self.last_window_ = (
                y.iloc[-self.window_size :]
                .copy()
                .to_frame(name=y.name if y.name is not None else "y")
            )

        set_skforecast_warnings(suppress_warnings, action="default")

    def create_sample_weights(self, X_train: pd.DataFrame) -> np.ndarray:
        """
        Create weights for each observation according to the forecaster's attribute
        `weight_func`.

        Args:
            X_train: Dataframe created with the `create_train_X_y` method, first return.

        Returns:
            Weights to use in `fit` method.
        """

        sample_weight = None

        if self.weight_func is not None:
            sample_weight = self.weight_func(X_train.index)

        if sample_weight is not None:
            if np.isnan(sample_weight).any():
                raise ValueError(
                    "The resulting `sample_weight` cannot have NaN values."
                )
            if np.any(sample_weight < 0):
                raise ValueError(
                    "The resulting `sample_weight` cannot have negative values."
                )
            if np.sum(sample_weight) == 0:
                raise ValueError(
                    "The resulting `sample_weight` cannot be normalized because "
                    "the sum of the weights is zero."
                )

        return sample_weight

    def _create_predict_inputs(
        self,
        steps: int | str | pd.Timestamp,
        last_window: Union[pd.Series, pd.DataFrame, None] = None,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
        predict_probabilistic: bool = False,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        check_inputs: bool = True,
    ) -> Tuple[np.ndarray, Union[np.ndarray, None], pd.Index, int]:
        """
        Create the inputs needed for the first iteration of the prediction
        process. As this is a recursive process, the last window is updated at
        each iteration of the prediction process.

        Args:
            steps: Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data.
            exog: Exogenous variable/s included as predictor/s.
            predict_probabilistic: If `True`, the necessary checks for probabilistic predictions will be
                performed.
            use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method.
            use_binned_residuals: If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly.
            check_inputs: If `True`, the input is checked for possible warnings and errors
                with the `check_predict_input` function. This argument is created
                for internal use and is not recommended to be changed.

        Returns:
            - last_window_values:
                Numpy array of the last window values to use for prediction,
                transformed and ready for input into the prediction method.
            - exog_values:
                Numpy array of exogenous variable values for prediction,
                transformed and ready for input into the prediction method,
                or None if no exogenous variables are used.
            - prediction_index:
                Pandas Index for the predicted values, constructed based on the
                last window index and the number of steps to predict.
            - steps:
                Number of future steps predicted.
        """

        if last_window is None:
            last_window = self.last_window_

        if self.is_fitted:
            steps = date_to_index_position(
                index=last_window.index,
                date_input=steps,
                method="prediction",
                date_literal="steps",
            )

        if check_inputs:
            check_predict_input(
                forecaster_name=type(self).__name__,
                steps=steps,
                is_fitted=self.is_fitted,
                exog_in_=self.exog_in_,
                index_type_=self.index_type_,
                index_freq_=self.index_freq_,
                window_size=self.window_size,
                last_window=last_window,
                last_window_exog=None,
                exog=exog,
                exog_names_in_=self.exog_names_in_,
                interval=None,
            )

            if predict_probabilistic:
                check_residuals_input(
                    forecaster_name=type(self).__name__,
                    use_in_sample_residuals=use_in_sample_residuals,
                    in_sample_residuals_=self.in_sample_residuals_,
                    out_sample_residuals_=self.out_sample_residuals_,
                    use_binned_residuals=use_binned_residuals,
                    in_sample_residuals_by_bin_=self.in_sample_residuals_by_bin_,
                    out_sample_residuals_by_bin_=self.out_sample_residuals_by_bin_,
                )

        last_window_values = (
            last_window.iloc[-self.window_size :].to_numpy(copy=True).ravel()
        )
        last_window_values = transform_numpy(
            array=last_window_values,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=False,
        )
        if self.differentiation is not None:
            last_window_values = self.differentiator.fit_transform(last_window_values)

        if exog is not None:
            exog = input_to_frame(data=exog, input_name="exog")
            if exog.columns.tolist() != self.exog_names_in_:
                exog = exog[self.exog_names_in_]

            exog = transform_dataframe(
                df=exog,
                transformer=self.transformer_exog,
                fit=False,
                inverse_transform=False,
            )

            if not exog.dtypes.to_dict() == self.exog_dtypes_out_:
                check_exog_dtypes(exog=exog)
            else:
                check_exog(exog=exog, allow_nan=False)

            exog_values = exog.to_numpy()[:steps]
        else:
            exog_values = None

        prediction_index = expand_index(index=last_window.index, steps=steps)

        if self.transformer_y is not None or self.differentiation is not None:
            warnings.warn(
                "The output matrix is in the transformed scale due to the "
                "inclusion of transformations or differentiation in the Forecaster. "
                "As a result, any predictions generated using this matrix will also "
                "be in the transformed scale. Please refer to the documentation "
                "for more details: "
                "https://skforecast.org/latest/user_guides/training-and-prediction-matrices.html",
                DataTransformationWarning,
            )

        return last_window_values, exog_values, prediction_index, steps

    def _recursive_predict(
        self,
        steps: int,
        last_window_values: np.ndarray,
        exog_values: np.ndarray | None = None,
    ) -> np.ndarray:
        """
        Predict n steps ahead. It is an iterative process in which, each prediction,
        is used as a predictor for the next step.

        Args:
            steps:
                Number of steps to predict.
            last_window_values:
                Series values used to create the predictors needed in the first
                iteration of the prediction (t + 1).
            exog_values:
                Exogenous variable/s included as predictor/s.

        Returns:
            Predicted values.
        """

        original_device = set_cpu_gpu_device(estimator=self.estimator, device="cpu")

        n_lags = len(self.lags) if self.lags is not None else 0
        n_window_features = (
            len(self.X_train_window_features_names_out_)
            if self.window_features is not None
            else 0
        )
        n_exog = exog_values.shape[1] if exog_values is not None else 0

        X = np.full(
            shape=(n_lags + n_window_features + n_exog), fill_value=np.nan, dtype=float
        )
        predictions = np.full(shape=steps, fill_value=np.nan, dtype=float)
        last_window = np.concatenate((last_window_values, predictions))

        estimator_name = type(self.estimator).__name__
        is_linear = isinstance(self.estimator, LinearModel)
        is_lightgbm = estimator_name == "LGBMRegressor"
        is_xgboost = estimator_name == "XGBRegressor"

        if is_linear:
            coef = self.estimator.coef_
            intercept = self.estimator.intercept_
        elif is_lightgbm:
            booster = self.estimator.booster_
        elif is_xgboost:
            booster = self.estimator.get_booster()

        has_lags = self.lags is not None
        has_window_features = self.window_features is not None
        has_exog = exog_values is not None

        for i in range(steps):

            if has_lags:
                X[:n_lags] = last_window[-self.lags - (steps - i)]
            if has_window_features:
                window_data = last_window[i : -(steps - i)]
                X[n_lags : n_lags + n_window_features] = np.concatenate(
                    [wf.transform(window_data) for wf in self.window_features]
                )
            if has_exog:
                X[n_lags + n_window_features :] = exog_values[i]

            if is_linear:
                pred = np.dot(X, coef) + intercept
            elif is_lightgbm:
                pred = booster.predict(X.reshape(1, -1))
            elif is_xgboost:
                pred = booster.inplace_predict(X.reshape(1, -1))
            else:
                pred = self.estimator.predict(X.reshape(1, -1)).ravel()

            pred = pred.item()
            predictions[i] = pred

            # Update `last_window` values. The first position is discarded and
            # the new prediction is added at the end.
            last_window[-(steps - i)] = pred

        set_cpu_gpu_device(estimator=self.estimator, device=original_device)

        return predictions

    def _recursive_predict_bootstrapping(
        self,
        steps: int,
        last_window_values: np.ndarray,
        sampled_residuals: np.ndarray,
        use_binned_residuals: bool,
        n_boot: int,
        exog_values: np.ndarray | None = None,
    ) -> np.ndarray:
        """
        Vectorized bootstrap prediction - predict all n_boot iterations per step.
        Instead of running n_boot sequential predictions, this method predicts
        all bootstrap samples at once per step, significantly reducing overhead.

        Args:
            steps:
                Number of steps to predict.
            last_window_values:
                Series values used to create the predictors needed in the first
                iteration of the prediction (t + 1).
            sampled_residuals:
                Pre-sampled residuals for all bootstrap iterations.
                - If `use_binned_residuals=True`: 3D array of shape (n_bins, steps, n_boot)
                - If `use_binned_residuals=False`: 2D array of shape (steps, n_boot)
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values.
                If `False`, residuals are selected randomly.
            n_boot:
                Number of bootstrap iterations.
            exog_values:
                Exogenous variable/s included as predictor/s. Defaults to None.

        Returns:
            Numpy ndarray with the predicted values. Shape (steps, n_boot).

        Raises:
            ValueError:
                If `sampled_residuals` does not match the expected shape/dimensions.
            IndexError:
                If `last_window_values` or `exog_values` are not of expected lengths.

        Examples:
            >>> import numpy as np
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=2)
            >>> _ = forecaster.fit(y=pd.Series(np.arange(10)))
            >>> last_window = np.array([8, 9])
            >>> residuals = np.random.normal(size=(3, 5)) # 3 steps, 5 boots
            >>> preds = forecaster._recursive_predict_bootstrapping(
            ...     steps=3,
            ...     last_window_values=last_window,
            ...     sampled_residuals=residuals,
            ...     use_binned_residuals=False,
            ...     n_boot=5
            ... )
            >>> preds.shape
            (3, 5)
        """

        original_device = set_cpu_gpu_device(estimator=self.estimator, device="cpu")

        n_lags = len(self.lags) if self.lags is not None else 0
        n_window_features = (
            len(self.X_train_window_features_names_out_)
            if self.window_features is not None
            else 0
        )
        n_exog = exog_values.shape[1] if exog_values is not None else 0
        n_features = n_lags + n_window_features + n_exog

        # Input matrix for prediction: shape (n_boot, n_features)
        X = np.full((n_boot, n_features), fill_value=np.nan, dtype=float)

        # Output predictions: shape (steps, n_boot)
        predictions = np.full((steps, n_boot), fill_value=np.nan, dtype=float)

        # Expand last_window to 2D: (window_size + steps, n_boot)
        # Each column represents a separate bootstrap trajectory
        last_window = np.tile(last_window_values[:, np.newaxis], (1, n_boot))
        last_window = np.vstack([last_window, np.full((steps, n_boot), np.nan)])

        estimator_name = type(self.estimator).__name__
        is_linear = isinstance(self.estimator, LinearModel)
        is_lightgbm = estimator_name == "LGBMRegressor"
        is_xgboost = estimator_name == "XGBRegressor"

        if is_linear:
            coef = self.estimator.coef_
            intercept = self.estimator.intercept_
        elif is_lightgbm:
            booster = self.estimator.booster_
        elif is_xgboost:
            booster = self.estimator.get_booster()

        has_lags = self.lags is not None
        has_window_features = self.window_features is not None
        has_exog = exog_values is not None

        for i in range(steps):

            if has_lags:
                for j, lag in enumerate(self.lags):
                    X[:, j] = last_window[-(lag + steps - i), :]

            if has_window_features:
                window_data = last_window[: -(steps - i), :]
                # transform accepts 2D: (window_length, n_boot) -> (n_boot, n_stats)
                # and concatenate along axis=1: (n_boot, total_window_features)
                X[:, n_lags : n_lags + n_window_features] = np.concatenate(
                    [wf.transform(window_data) for wf in self.window_features], axis=1
                )

            if has_exog:
                X[:, n_lags + n_window_features :] = exog_values[i]

            if is_linear:
                pred = np.dot(X, coef) + intercept
            elif is_lightgbm:
                pred = booster.predict(X)
            elif is_xgboost:
                pred = booster.inplace_predict(X)
            else:
                pred = self.estimator.predict(X).ravel()

            if use_binned_residuals:
                # sampled_residuals is a 3D array: (n_bins, steps, n_boot)
                boot_indices = np.arange(n_boot)
                pred_bins = self.binner.transform(pred).astype(int)
                pred += sampled_residuals[pred_bins, i, boot_indices]
            else:
                pred += sampled_residuals[i, :]

            predictions[i, :] = pred
            last_window[-(steps - i), :] = pred

        set_cpu_gpu_device(estimator=self.estimator, device=original_device)

        return predictions

    def create_predict_X(
        self,
        steps: int,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        check_inputs: bool = True,
    ) -> pd.DataFrame:
        """
        Create the predictors needed to predict `steps` ahead. As it is a recursive
        process, the predictors are created at each iteration of the prediction
        process.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            check_inputs:
                If `True`, the input is checked for possible warnings and errors
                with the `check_predict_input` function. This argument is created
                for internal use and is not recommended to be changed.
                Defaults to True.

        Returns:
            Pandas DataFrame with the predictors for each step. The index
            is the same as the prediction index.
        """

        (
            last_window_values,
            exog_values,
            prediction_index,
            steps,
        ) = self._create_predict_inputs(
            steps=steps,
            last_window=last_window,
            exog=exog,
            check_inputs=check_inputs,
        )

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            predictions = self._recursive_predict(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
            )

        X_predict = []
        full_predictors = np.concatenate((last_window_values, predictions))

        if self.lags is not None:
            idx = np.arange(-steps, 0)[:, None] - self.lags
            X_lags = full_predictors[idx + len(full_predictors)]
            X_predict.append(X_lags)

        if self.window_features is not None:
            X_window_features = np.full(
                shape=(steps, len(self.X_train_window_features_names_out_)),
                fill_value=np.nan,
                order="C",
                dtype=float,
            )
            for i in range(steps):
                X_window_features[i, :] = np.concatenate(
                    [
                        wf.transform(full_predictors[i : -(steps - i)])
                        for wf in self.window_features
                    ]
                )
            X_predict.append(X_window_features)

        if exog is not None:
            X_predict.append(exog_values)

        X_predict = pd.DataFrame(
            data=np.concatenate(X_predict, axis=1),
            columns=self.X_train_features_names_out_,
            index=prediction_index,
        )

        if self.exog_in_:
            categorical_features = any(
                not pd.api.types.is_numeric_dtype(dtype)
                or pd.api.types.is_bool_dtype(dtype)
                for dtype in set(self.exog_dtypes_out_.values())
            )
            if categorical_features:
                X_predict = X_predict.astype(self.exog_dtypes_out_)

        if self.transformer_y is not None or self.differentiation is not None:
            warnings.warn(
                "The output matrix is in the transformed scale due to the "
                "inclusion of transformations or differentiation in the Forecaster. "
                "As a result, any predictions generated using this matrix will also "
                "be in the transformed scale. Please refer to the documentation "
                "for more details: "
                "https://skforecast.org/latest/user_guides/training-and-prediction-matrices.html",
                DataTransformationWarning,
            )

        return X_predict

    def predict(
        self,
        steps: int | str | pd.Timestamp,
        last_window: Union[pd.Series, pd.DataFrame, None] = None,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
        check_inputs: bool = True,
    ) -> pd.Series:
        """
        Predict future values recursively for the specified number of steps.

        Args:
            steps:
                Number of future steps to predict.
            last_window:
                Optional last window of observed values to use for prediction. If None, uses the last window from training.
                Must be a pandas Series or DataFrame with the same structure as the training target series. Defaults to None.
            exog:
                Optional exogenous variables for prediction. Can be a pandas Series or DataFrame.
                Must have the same structure as the exogenous variables used in training. Defaults to None.
            check_inputs:
                Whether to perform input validation checks. Defaults to True.

        Returns:
            Pandas Series of predicted values for the specified number of steps,
            indexed according to the prediction index constructed from the last window and the number of steps.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> forecaster.fit(y=y, exog=exog)
            >>> last_window = y.iloc[-3:]
            >>> exog_future = pd.DataFrame({'temp': np.random.randn(5)}, index=pd.RangeIndex(start=30, stop=35))
            >>> predictions = forecaster.predict(
            ...     steps=5, last_window=last_window, exog=exog_future, check_inputs=True
            ... )
            >>> isinstance(predictions, pd.Series)
            True
        """

        last_window_values, exog_values, prediction_index, steps = (
            self._create_predict_inputs(
                steps=steps,
                last_window=last_window,
                exog=exog,
                check_inputs=check_inputs,
            )
        )

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            predictions = self._recursive_predict(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
            )

        if self.differentiation is not None:
            predictions = self.differentiator.inverse_transform_next_window(predictions)

        predictions = transform_numpy(
            array=predictions,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=True,
        )

        predictions = pd.Series(data=predictions, index=prediction_index, name="pred")

        return predictions

    def predict_bootstrapping(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Generate multiple forecasting predictions using a bootstrapping process.
        By sampling from a collection of past observed errors (the residuals),
        each iteration of bootstrapping generates a different set of predictions.
        See the References section for more information.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            n_boot:
                Number of bootstrapping iterations to perform when estimating prediction
                intervals. Defaults to 250.
            use_in_sample_residuals:
                If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.
            random_state:
                Seed for the random number generator to ensure reproducibility. Defaults to 123.

        Returns:
            Pandas DataFrame with predictions generated by bootstrapping. Shape: (steps, n_boot).

        Raises:
            ValueError:
                If `steps` is not an integer or a valid date.
            ValueError:
                If `exog` is missing or has invalid shape.
            ValueError:
                If `n_boot` is not a positive integer.
            ValueError:
                If `use_in_sample_residuals=True` and `in_sample_residuals_` are not available.
            ValueError:
                If `use_in_sample_residuals=False` and `out_sample_residuals_` are not available.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> rng = np.random.default_rng(123)
            >>> y = pd.Series(rng.normal(size=100), name='y')
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> _ = forecaster.fit(y=y)
            >>> boot_preds = forecaster.predict_bootstrapping(steps=3, n_boot=5)
            >>> boot_preds.shape
            (3, 5)

        References:
            .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
                   https://otexts.com/fpp3/prediction-intervals.html
        """

        (
            last_window_values,
            exog_values,
            prediction_index,
            steps,
        ) = self._create_predict_inputs(
            steps=steps,
            last_window=last_window,
            exog=exog,
            predict_probabilistic=True,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
            check_inputs=True,
        )

        if use_in_sample_residuals:
            residuals = self.in_sample_residuals_
            residuals_by_bin = self.in_sample_residuals_by_bin_
        else:
            residuals = self.out_sample_residuals_
            residuals_by_bin = self.out_sample_residuals_by_bin_

        rng = np.random.default_rng(seed=random_state)
        if use_binned_residuals:
            # Create 3D array with sampled residuals: (n_bins, steps, n_boot)
            n_bins = len(residuals_by_bin)
            sampled_residuals = np.stack(
                [
                    residuals_by_bin[k][
                        rng.integers(
                            low=0, high=len(residuals_by_bin[k]), size=(steps, n_boot)
                        )
                    ]
                    for k in range(n_bins)
                ],
                axis=0,
            )
        else:
            sampled_residuals = residuals[
                rng.integers(low=0, high=len(residuals), size=(steps, n_boot))
            ]

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            boot_predictions = self._recursive_predict_bootstrapping(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
                sampled_residuals=sampled_residuals,
                use_binned_residuals=use_binned_residuals,
                n_boot=n_boot,
            )

        if self.differentiation is not None:
            boot_predictions = self.differentiator.inverse_transform_next_window(
                boot_predictions
            )

        if self.transformer_y:
            boot_predictions = transform_numpy(
                array=boot_predictions,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=True,
            )

        boot_columns = [f"pred_boot_{i}" for i in range(n_boot)]
        boot_predictions = pd.DataFrame(
            data=boot_predictions, index=prediction_index, columns=boot_columns
        )

        return boot_predictions

    def predict_quantiles(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        quantiles: list[float] | tuple[float] = [0.05, 0.5, 0.95],
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Calculate the specified quantiles for each step. After generating
        multiple forecasting predictions through a bootstrapping process, each
        quantile is calculated for each step.

        Args:
            steps: Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in` self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data.
            exog: Exogenous variable/s included as predictor/s.
            quantiles: Sequence of quantiles to compute, which must be between 0 and 1
                inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as
                `quantiles = [0.05, 0.5, 0.95]`.
            n_boot: Number of bootstrapping iterations to perform when estimating quantiles.
            use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method.
            use_binned_residuals: If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly.
            random_state: Seed for the random number generator to ensure reproducibility.

        Returns:
            Quantiles predicted by the forecaster.
        """

        check_interval(quantiles=quantiles)

        boot_predictions = self.predict_bootstrapping(
            steps=steps,
            last_window=last_window,
            exog=exog,
            n_boot=n_boot,
            random_state=random_state,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )

        predictions = boot_predictions.quantile(q=quantiles, axis=1).transpose()
        predictions.columns = [f"q_{q}" for q in quantiles]

        return predictions

    def predict_dist(
        self,
        steps: int | str | pd.Timestamp,
        distribution: object,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Fit a given probability distribution for each step. After generating
        multiple forecasting predictions through a bootstrapping process, each
        step is fitted to the given distribution.

        Args:
            steps: Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            distribution: A distribution object from scipy.stats with methods `_pdf` and `fit`.
                For example scipy.stats.norm.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in` self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data.
            exog: Exogenous variable/s included as predictor/s.
            n_boot: Number of bootstrapping iterations to perform when estimating prediction
                intervals.
            use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method.
            use_binned_residuals: If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly.
            random_state: Seed for the random number generator to ensure reproducibility.

        Returns:
            Distribution parameters estimated for each step.
        """

        if not hasattr(distribution, "_pdf") or not callable(
            getattr(distribution, "fit", None)
        ):
            raise TypeError(
                "`distribution` must be a valid probability distribution object "
                "from scipy.stats, with methods `_pdf` and `fit`."
            )

        predictions = self.predict_bootstrapping(
            steps=steps,
            last_window=last_window,
            exog=exog,
            n_boot=n_boot,
            random_state=random_state,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )

        param_names = [
            p for p in inspect.signature(distribution._pdf).parameters if not p == "x"
        ] + ["loc", "scale"]

        predictions[param_names] = predictions.apply(
            lambda x: distribution.fit(x), axis=1, result_type="expand"
        )
        predictions = predictions[param_names]

        return predictions

    def _predict_interval_conformal(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        nominal_coverage: float = 0.95,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
    ) -> pd.DataFrame:
        """
        Generate prediction intervals using the conformal prediction
        split method [1]_.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in` self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            nominal_coverage:
                Nominal coverage, also known as expected coverage, of the prediction
                intervals. Must be between 0 and 1. Defaults to 0.95.
            use_in_sample_residuals:
                If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.

        Returns:
            Pandas DataFrame with values predicted by the forecaster and their estimated interval.
            - pred: predictions.
            - lower_bound: lower bound of the interval.
            - upper_bound: upper bound of the interval.

        Raises:
            ValueError:
                If `nominal_coverage` is not between 0 and 1.
            ValueError:
                If inputs are invalid (checked by `_create_predict_inputs`).

        Examples:
            >>> # Internal method, typically used via predict_interval(method='conformal')
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> rng = np.random.default_rng(123)
            >>> y = pd.Series(rng.normal(size=100), name='y')
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> _ = forecaster.fit(y=y)
            >>> preds = forecaster._predict_interval_conformal(steps=3, nominal_coverage=0.9)
            >>> preds.columns.tolist()
            ['pred', 'lower_bound', 'upper_bound']

        References:
            .. [1] MAPIE - Model Agnostic Prediction Interval Estimator.
                   https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method
        """

        last_window_values, exog_values, prediction_index, steps = (
            self._create_predict_inputs(
                steps=steps,
                last_window=last_window,
                exog=exog,
                predict_probabilistic=True,
                use_in_sample_residuals=use_in_sample_residuals,
                use_binned_residuals=use_binned_residuals,
                check_inputs=True,
            )
        )

        if use_in_sample_residuals:
            residuals = self.in_sample_residuals_
            residuals_by_bin = self.in_sample_residuals_by_bin_
        else:
            residuals = self.out_sample_residuals_
            residuals_by_bin = self.out_sample_residuals_by_bin_

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            predictions = self._recursive_predict(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
            )

        if use_binned_residuals:
            # Fallback to global residuals if bin is empty
            if len(residuals) > 0:
                global_cf = np.quantile(np.abs(residuals), nominal_coverage)
            else:
                global_cf = np.nan

            correction_factor_by_bin = {}
            for k, v in residuals_by_bin.items():
                if len(v) > 0:
                    correction_factor_by_bin[k] = np.quantile(
                        np.abs(v), nominal_coverage
                    )
                else:
                    correction_factor_by_bin[k] = global_cf

            replace_func = np.vectorize(
                lambda x: correction_factor_by_bin.get(x, global_cf)
            )

            predictions_bin = self.binner.transform(predictions)
            correction_factor = replace_func(predictions_bin)
        else:
            correction_factor = np.quantile(np.abs(residuals), nominal_coverage)

        lower_bound = predictions - correction_factor
        upper_bound = predictions + correction_factor
        predictions = np.column_stack([predictions, lower_bound, upper_bound])

        if self.differentiation is not None:
            predictions = self.differentiator.inverse_transform_next_window(predictions)

        if self.transformer_y:
            predictions = transform_numpy(
                array=predictions,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=True,
            )

        predictions = pd.DataFrame(
            data=predictions,
            index=prediction_index,
            columns=["pred", "lower_bound", "upper_bound"],
        )

        return predictions

    def predict_interval(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        method: str = "bootstrapping",
        interval: float | list[float] | tuple[float] = [5, 95],
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Predict n steps ahead and estimate prediction intervals using either
        bootstrapping or conformal prediction methods. Refer to the References
        section for additional details on these methods.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            method:
                Technique used to estimate prediction intervals. Available options:
                - 'bootstrapping': Bootstrapping is used to generate prediction
                  intervals [1]_.
                - 'conformal': Employs the conformal prediction split method for
                  interval estimation [2]_.
                Defaults to 'bootstrapping'.
            interval:
                Confidence level of the prediction interval. Interpretation depends
                on the method used:
                - If `float`, represents the nominal (expected) coverage (between 0
                  and 1). For instance, `interval=0.95` corresponds to `[2.5, 97.5]`
                  percentiles.
                - If `list` or `tuple`, defines the exact percentiles to compute, which
                  must be between 0 and 100 inclusive. For example, interval
                  of 95% should be as `interval = [2.5, 97.5]`.
                - When using `method='conformal'`, the interval must be a float or
                  a list/tuple defining a symmetric interval.
                Defaults to [5, 95].
            n_boot:
                Number of bootstrapping iterations to perform when estimating prediction
                intervals. Defaults to 250.
            use_in_sample_residuals:
                If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.
            random_state:
                Seed for the random number generator to ensure reproducibility. Defaults to 123.

        Returns:
            Pandas DataFrame with values predicted by the forecaster and their estimated interval.
            - pred: predictions.
            - lower_bound: lower bound of the interval.
            - upper_bound: upper bound of the interval.

        Raises:
            ValueError:
                If `method` is not 'bootstrapping' or 'conformal'.
            ValueError:
                 If `interval` is invalid or not compatible with the chosen method.
            ValueError:
                If inputs (`steps`, `exog`, etc.) are invalid.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> rng = np.random.default_rng(123)
            >>> y = pd.Series(rng.normal(size=100), name='y')
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> _ = forecaster.fit(y=y)
            >>> # Bootstrapping method
            >>> intervals_boot = forecaster.predict_interval(
            ...     steps=3, method='bootstrapping', interval=[5, 95]
            ... )
            >>> intervals_boot.columns.tolist()
            ['pred', 'lower_bound', 'upper_bound']

            >>> # Conformal method
            >>> intervals_conf = forecaster.predict_interval(
            ...     steps=3, method='conformal', interval=0.95
            ... )
            >>> intervals_conf.columns.tolist()
            ['pred', 'lower_bound', 'upper_bound']

        References:
            .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
                   https://otexts.com/fpp3/prediction-intervals.html
            .. [2] MAPIE - Model Agnostic Prediction Interval Estimator.
                   https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method
        """

        if method == "bootstrapping":

            if isinstance(interval, (list, tuple)):
                check_interval(interval=interval, ensure_symmetric_intervals=False)
                interval = np.array(interval) / 100
            else:
                check_interval(alpha=interval, alpha_literal="interval")
                interval = np.array([0.5 - interval / 2, 0.5 + interval / 2])

            boot_predictions = self.predict_bootstrapping(
                steps=steps,
                last_window=last_window,
                exog=exog,
                n_boot=n_boot,
                random_state=random_state,
                use_in_sample_residuals=use_in_sample_residuals,
                use_binned_residuals=use_binned_residuals,
            )

            predictions = self.predict(
                steps=steps, last_window=last_window, exog=exog, check_inputs=False
            )

            predictions_interval = boot_predictions.quantile(
                q=interval, axis=1
            ).transpose()
            predictions_interval.columns = ["lower_bound", "upper_bound"]
            predictions = pd.concat((predictions, predictions_interval), axis=1)

        elif method == "conformal":

            if isinstance(interval, (list, tuple)):
                check_interval(interval=interval, ensure_symmetric_intervals=True)
                nominal_coverage = (interval[1] - interval[0]) / 100
            else:
                check_interval(alpha=interval, alpha_literal="interval")
                nominal_coverage = interval

            predictions = self._predict_interval_conformal(
                steps=steps,
                last_window=last_window,
                exog=exog,
                nominal_coverage=nominal_coverage,
                use_in_sample_residuals=use_in_sample_residuals,
                use_binned_residuals=use_binned_residuals,
            )
        else:
            raise ValueError(
                f"Invalid `method` '{method}'. Choose 'bootstrapping' or 'conformal'."
            )

        return predictions

    def _binning_in_sample_residuals(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
        store_in_sample_residuals: bool = False,
        random_state: int = 123,
    ) -> None:
        """
        Bin residuals according to the predicted value each residual is
        associated with. First a `skforecast.preprocessing.QuantileBinner` object
        is fitted to the predicted values. Then, residuals are binned according
        to the predicted value each residual is associated with. Residuals are
        stored in the forecaster object as `in_sample_residuals_` and
        `in_sample_residuals_by_bin_`.

        `y_true` and `y_pred` assumed to be differentiated and or transformed
        according to the attributes `differentiation` and `transformer_y`.
        The number of residuals stored per bin is limited to
        `10_000 // self.binner.n_bins_`. The total number of residuals stored is
        `10_000`.

        Args:
            y_true: True values of the time series.
            y_pred: Predicted values of the time series.
            store_in_sample_residuals: If `True`, in-sample residuals will be stored in the forecaster object
                after fitting (`in_sample_residuals_` and `in_sample_residuals_by_bin_`
                attributes). If `False`, only the intervals of the bins are stored.
            random_state: Set a seed for the random generator so that the stored sample
                residuals are always deterministic.
        """

        residuals = y_true - y_pred

        if self._probabilistic_mode == "binned":
            data = pd.DataFrame({"prediction": y_pred, "residuals": residuals})
            self.binner.fit(y_pred)
            self.binner_intervals_ = self.binner.intervals_

        if store_in_sample_residuals:
            rng = np.random.default_rng(seed=random_state)
            if self._probabilistic_mode == "binned":
                data["bin"] = self.binner.transform(y_pred).astype(int)
                self.in_sample_residuals_by_bin_ = (
                    data.groupby("bin")["residuals"].apply(np.array).to_dict()
                )

                max_sample = 10_000 // self.binner.n_bins_
                for k, v in self.in_sample_residuals_by_bin_.items():
                    if len(v) > max_sample:
                        sample = v[rng.integers(low=0, high=len(v), size=max_sample)]
                        self.in_sample_residuals_by_bin_[k] = sample

            if len(residuals) > 10_000:
                residuals = residuals[
                    rng.integers(low=0, high=len(residuals), size=10_000)
                ]

            self.in_sample_residuals_ = residuals

    def set_fit_kwargs(self, fit_kwargs: dict[str, object]) -> None:
        """
        Set new values for the additional keyword arguments passed to the `fit`
        method of the estimator.

        Args:
            fit_kwargs: Dict of the form {"argument": new_value}.
        """

        self.fit_kwargs = check_select_fit_kwargs(self.estimator, fit_kwargs=fit_kwargs)

    def set_lags(
        self, lags: Union[int, List[int], np.ndarray, range, None] = None
    ) -> None:
        """
        Set new value to the attribute `lags`. Attributes `lags_names`,
        `max_lag` and `window_size` are also updated.

        Args:
            lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
                - `int`: include lags from 1 to `lags` (included).
                - `list`, `1d numpy ndarray` or `range`: include only lags present in
                `lags`, all elements must be int.
                - `None`: no lags are included as predictors.
        """

        if self.window_features is None and lags is None:
            raise ValueError(
                "At least one of the arguments `lags` or `window_features` "
                "must be different from None. This is required to create the "
                "predictors used in training the forecaster."
            )

        self.lags, self.lags_names, self.max_lag = initialize_lags(
            type(self).__name__, lags
        )
        self.window_size = max(
            [
                ws
                for ws in [self.max_lag, self.max_size_window_features]
                if ws is not None
            ]
        )
        if self.differentiation is not None:
            self.window_size += self.differentiation
            self.differentiator.set_params(window_size=self.window_size)

    def set_window_features(
        self, window_features: object | list[object] | None = None
    ) -> None:
        """
        Set new value to the attribute `window_features`.

        Attributes `max_size_window_features`, `window_features_names`,
        `window_features_class_names` and `window_size` are also updated.

        Args:
            window_features: Instance or list of instances used to create window features.
                Window features are created from the original time series and are
                included as predictors.

        Returns:
            None

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> rolling = RollingFeatures(stats=['mean', 'std'], window_sizes=[3, 5])
            >>> forecaster.set_window_features(window_features=rolling)
            >>> forecaster.window_features_names
            ['roll_mean_3', 'roll_std_3', 'roll_mean_5', 'roll_std_5']
            >>> forecaster.window_size
            5
        """

        if window_features is None and self.lags is None:
            raise ValueError(
                "At least one of the arguments `lags` or `window_features` "
                "must be different from None. This is required to create the "
                "predictors used in training the forecaster."
            )

        (
            self.window_features,
            self.window_features_names,
            self.max_size_window_features,
        ) = initialize_window_features(window_features)
        self.window_features_class_names = None
        if window_features is not None:
            self.window_features_class_names = [
                type(wf).__name__ for wf in self.window_features
            ]
        self.window_size = max(
            [
                ws
                for ws in [self.max_lag, self.max_size_window_features]
                if ws is not None
            ]
        )
        if self.differentiation is not None:
            self.window_size += self.differentiation
            self.differentiator.set_params(window_size=self.window_size)

    def get_feature_importances(self, sort_importance: bool = True) -> pd.DataFrame:
        """
        Return feature importances of the estimator stored in the forecaster.
        Only valid when estimator stores internally the feature importances in the
        attribute `feature_importances_` or `coef_`. Otherwise, returns `None`.

        Args:
            sort_importance: If `True`, sorts the feature importances in descending order.

        Returns:
            pd.DataFrame: Feature importances associated with each predictor.

        Raises:
            NotFittedError: If the forecaster is not fitted.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.fit(y=pd.Series(np.arange(20)))
            >>> forecaster.get_feature_importances()
              feature  importance
            0   lag_1         1.0
            1   lag_2         0.0
            2   lag_3         0.0
        """

        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `get_feature_importances()`."
            )

        if isinstance(self.estimator, Pipeline):
            estimator = self.estimator[-1]
        else:
            estimator = self.estimator

        if hasattr(estimator, "feature_importances_"):
            feature_importances = estimator.feature_importances_
        elif hasattr(estimator, "coef_"):
            feature_importances = estimator.coef_
        else:
            warnings.warn(
                f"Impossible to access feature importances for estimator of type "
                f"{type(estimator)}. This method is only valid when the "
                f"estimator stores internally the feature importances in the "
                f"attribute `feature_importances_` or `coef_`.",
                UserWarning,
            )
            feature_importances = None

        if feature_importances is not None:
            feature_importances = pd.DataFrame(
                {
                    "feature": self.X_train_features_names_out_,
                    "importance": feature_importances,
                }
            )
            if sort_importance:
                feature_importances = feature_importances.sort_values(
                    by="importance", ascending=False
                )

        return feature_importances

    def set_in_sample_residuals(
        self,
        y: pd.Series,
        exog: pd.Series | pd.DataFrame | None = None,
        random_state: int = 123,
    ) -> None:
        """
        Set in-sample residuals in case they were not calculated during the
        training process.

        In-sample residuals are calculated as the difference between the true
        values and the predictions made by the forecaster using the training
        data. The following internal attributes are updated:

        + `in_sample_residuals_`: residuals stored in a numpy ndarray.
        + `binner_intervals_`: intervals used to bin the residuals are calculated
        using the quantiles of the predicted values.
        + `in_sample_residuals_by_bin_`: residuals are binned according to the
        predicted value they are associated with and stored in a dictionary, where
        the keys are the intervals of the predicted values and the values are
        the residuals associated with that range.

        A total of 10_000 residuals are stored in the attribute `in_sample_residuals_`.
        If the number of residuals is greater than 10_000, a random sample of
        10_000 residuals is stored. The number of residuals stored per bin is
        limited to `10_000 // self.binner.n_bins_`.

        Args:
                y: Target time series.
            exog: Exogenous variables.
            random_state: Random state for reproducibility.

        Returns:
            None

        Raises:
            NotFittedError: If the forecaster is not fitted.
            IndexError: If the index range of `y` does not match the range
                used during training.
            ValueError: If the features generated from the provided data do not
                match those used during the training process.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
            >>> forecaster.set_in_sample_residuals(y=pd.Series(np.arange(20)))
            >>> forecaster.in_sample_residuals_
            array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
        """
        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `set_in_sample_residuals()`."
            )

        check_y(y=y)
        y_index_range = check_extract_values_and_index(
            data=y, data_label="`y`", return_values=False
        )[1][[0, -1]]

        if not y_index_range.equals(self.training_range_):
            raise IndexError(
                f"The index range of `y` does not match the range "
                f"used during training. Please ensure the index is aligned "
                f"with the training data.\n"
                f"    Expected : {self.training_range_}\n"
                f"    Received : {y_index_range}"
            )

        (
            X_train,
            y_train,
            _,
            _,
            _,
            X_train_features_names_out_,
            *_,
        ) = self._create_train_X_y(y=y, exog=exog)

        if not X_train_features_names_out_ == self.X_train_features_names_out_:
            raise ValueError(
                f"Feature mismatch detected after matrix creation. The features "
                f"generated from the provided data do not match those used during "
                f"the training process. To correctly set in-sample residuals, "
                f"ensure that the same data and preprocessing steps are applied.\n"
                f"    Expected output : {self.X_train_features_names_out_}\n"
                f"    Current output  : {X_train_features_names_out_}"
            )

        self._binning_in_sample_residuals(
            y_true=y_train.to_numpy(),
            y_pred=self.estimator.predict(X_train).ravel(),
            store_in_sample_residuals=True,
            random_state=random_state,
        )

    def set_out_sample_residuals(
        self,
        y_true: np.ndarray | pd.Series,
        y_pred: np.ndarray | pd.Series,
        append: bool = False,
        random_state: int = 123,
    ) -> None:
        """
        Set new values to the attribute `out_sample_residuals_`.

        Out of sample residuals are meant to be calculated using observations that
        did not participate in the training process. `y_true` and `y_pred` are
        expected to be in the original scale of the time series. Residuals are
        calculated as `y_true` - `y_pred`, after applying the necessary
        transformations and differentiations if the forecaster includes them
        (`self.transformer_y` and `self.differentiation`). Two internal attributes
        are updated:

        + `out_sample_residuals_`: residuals stored in a numpy ndarray.
        + `out_sample_residuals_by_bin_`: residuals are binned according to the
        predicted value they are associated with and stored in a dictionary, where
        the keys are the intervals of the predicted values and the values are
        the residuals associated with that range. If a bin is empty, it is filled
        with a random sample of residuals from other bins. This is done to ensure
        that all bins have at least one residual and can be used in the prediction
        process.

        A total of 10_000 residuals are stored in the attribute `out_sample_residuals_`.
        If the number of residuals is greater than 10_000, a random sample of
        10_000 residuals is stored. The number of residuals stored per bin is
        limited to `10_000 // self.binner.n_bins_`.

        Args:
            y_true: True values of the time series in the original scale.
            y_pred: Predicted values of the time series in the original scale.
            append: If `True`, new residuals are added to the once already stored
                in the forecaster. If after appending the new residuals, the limit
                of `10_000 // self.binner.n_bins_` values per bin is reached, a
                random sample of residuals is stored.
            random_state: Random state for reproducibility.

        Returns:
            None

        Raises:
            NotFittedError: If the forecaster is not fitted.
            TypeError: If `y_true` or `y_pred` are not `numpy ndarray` or `pandas Series`.
            ValueError: If `y_true` and `y_pred` have different length or index (if Series).

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
            >>> y_true = np.array([20, 21, 22, 23, 24])
            >>> y_pred = np.array([20.1, 20.9, 22.2, 22.8, 24.0])
            >>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=y_pred)
            >>> forecaster.out_sample_residuals_
            array([-0.1,  0.1, -0.2,  0.2,  0. ])
        """
        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `set_out_sample_residuals()`."
            )

        if not isinstance(y_true, (np.ndarray, pd.Series)):
            raise TypeError(
                f"`y_true` argument must be `numpy ndarray` or `pandas Series`. "
                f"Got {type(y_true)}."
            )

        if not isinstance(y_pred, (np.ndarray, pd.Series)):
            raise TypeError(
                f"`y_pred` argument must be `numpy ndarray` or `pandas Series`. "
                f"Got {type(y_pred)}."
            )

        if len(y_true) != len(y_pred):
            raise ValueError(
                f"`y_true` and `y_pred` must have the same length. "
                f"Got {len(y_true)} and {len(y_pred)}."
            )

        if isinstance(y_true, pd.Series) and isinstance(y_pred, pd.Series):
            if not y_true.index.equals(y_pred.index):
                raise ValueError("`y_true` and `y_pred` must have the same index.")

        if not isinstance(y_pred, np.ndarray):
            y_pred = y_pred.to_numpy()
        if not isinstance(y_true, np.ndarray):
            y_true = y_true.to_numpy()

        if self.transformer_y:
            y_true = transform_numpy(
                array=y_true,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=False,
            )
            y_pred = transform_numpy(
                array=y_pred,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=False,
            )

        if self.differentiation is not None:
            differentiator = copy(self.differentiator)
            differentiator.set_params(window_size=None)
            y_true = differentiator.fit_transform(y_true)[self.differentiation :]
            y_pred = differentiator.fit_transform(y_pred)[self.differentiation :]

        data = pd.DataFrame(
            {"prediction": y_pred, "residuals": y_true - y_pred}
        ).dropna()
        y_pred = data["prediction"].to_numpy()
        residuals = data["residuals"].to_numpy()

        if self.binner is not None:
            data["bin"] = self.binner.transform(y_pred).astype(int)
            residuals_by_bin = (
                data.groupby("bin")["residuals"].apply(np.array).to_dict()
            )
        else:
            residuals_by_bin = {}

        out_sample_residuals = (
            np.array([])
            if self.out_sample_residuals_ is None
            else self.out_sample_residuals_
        )
        out_sample_residuals_by_bin = (
            {}
            if self.out_sample_residuals_by_bin_ is None
            else self.out_sample_residuals_by_bin_
        )
        if append:
            out_sample_residuals = np.concatenate([out_sample_residuals, residuals])
            for k, v in residuals_by_bin.items():
                if k in out_sample_residuals_by_bin:
                    out_sample_residuals_by_bin[k] = np.concatenate(
                        (out_sample_residuals_by_bin[k], v)
                    )
                else:
                    out_sample_residuals_by_bin[k] = v
        else:
            out_sample_residuals = residuals
            out_sample_residuals_by_bin = residuals_by_bin

        if self.binner is not None:
            max_samples = 10_000 // self.binner.n_bins
            rng = np.random.default_rng(seed=random_state)

            for k, v in out_sample_residuals_by_bin.items():
                if len(v) > max_samples:
                    out_sample_residuals_by_bin[k] = rng.choice(
                        v, size=max_samples, replace=False
                    )

            bin_keys = (
                [] if self.binner_intervals_ is None else self.binner_intervals_.keys()
            )
            empty_bins = [
                k
                for k in bin_keys
                if k not in out_sample_residuals_by_bin
                or len(out_sample_residuals_by_bin[k]) == 0
            ]

            if empty_bins:
                warnings.warn(
                    f"The following bins have no out of sample residuals: {empty_bins}. "
                    f"No predicted values fall in the interval "
                    f"{[self.binner_intervals_[bin] for bin in empty_bins]}. "
                    f"Empty bins will be filled with a random sample of residuals.",
                    ResidualsUsageWarning,
                )
                empty_bin_size = min(max_samples, len(out_sample_residuals))
                for k in empty_bins:
                    out_sample_residuals_by_bin[k] = rng.choice(
                        a=out_sample_residuals, size=empty_bin_size, replace=False
                    )

        self.out_sample_residuals_ = out_sample_residuals
        self.out_sample_residuals_by_bin_ = out_sample_residuals_by_bin

__repr__()

Information displayed when a ForecasterRecursive object is printed.

Returns:

Name Type Description
str str

String representation of the forecaster with key information about its configuration and state.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> print(forecaster)
=========================
ForecasterRecursive
=========================
Estimator: LinearRegression
Lags: [1, 2, 3]
Window features: []
Window size: 3
Series name: None
Exogenous included: False
Exogenous names: None
Transformer for y: None
Transformer for exog: None
Weight function included: False
Differentiation order: None
Training range: None
Training index type: None
Training index frequency: None
Estimator parameters: {...}
fit_kwargs: {...}
Creation date: ...
Last fit date: None
spotforecast version: ...
Python version: ...
Forecaster id: None
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def __repr__(self) -> str:
    """
    Information displayed when a ForecasterRecursive object is printed.

    Returns:
        str: String representation of the forecaster with key information about its configuration and state.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> print(forecaster)  # doctest: +ELLIPSIS
        =========================
        ForecasterRecursive
        =========================
        Estimator: LinearRegression
        Lags: [1, 2, 3]
        Window features: []
        Window size: 3
        Series name: None
        Exogenous included: False
        Exogenous names: None
        Transformer for y: None
        Transformer for exog: None
        Weight function included: False
        Differentiation order: None
        Training range: None
        Training index type: None
        Training index frequency: None
        Estimator parameters: {...}
        fit_kwargs: {...}
        Creation date: ...
        Last fit date: None
        spotforecast version: ...
        Python version: ...
        Forecaster id: None

    """

    params = (
        self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
    )
    exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None

    info = (
        f"{'=' * len(type(self).__name__)} \n"
        f"{type(self).__name__} \n"
        f"{'=' * len(type(self).__name__)} \n"
        f"Estimator: {type(self.estimator).__name__} \n"
        f"Lags: {self.lags} \n"
        f"Window features: {self.window_features_names} \n"
        f"Window size: {self.window_size} \n"
        f"Series name: {self.series_name_in_} \n"
        f"Exogenous included: {self.exog_in_} \n"
        f"Exogenous names: {exog_names_in_} \n"
        f"Transformer for y: {self.transformer_y} \n"
        f"Transformer for exog: {self.transformer_exog} \n"
        f"Weight function included: {True if self.weight_func is not None else False} \n"
        f"Differentiation order: {self.differentiation} \n"
        f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
        f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
        f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
        f"Estimator parameters: {params} \n"
        f"fit_kwargs: {self.fit_kwargs} \n"
        f"Creation date: {self.creation_date} \n"
        f"Last fit date: {self.fit_date} \n"
        f"spotforecast version: {self.spotforecast_version} \n"
        f"Python version: {self.python_version} \n"
        f"Forecaster id: {self.forecaster_id} \n"
    )

    return info

__setstate__(state)

Custom setstate to ensure backward compatibility when unpickling. Only sets spotforecast_tags if not present, preserving custom tags.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def __setstate__(self, state: dict) -> None:
    """
    Custom __setstate__ to ensure backward compatibility when unpickling.
    Only sets __spotforecast_tags__ if not present, preserving custom tags.
    """
    super().__setstate__(state)
    if not hasattr(self, "__spotforecast_tags__"):
        self.__spotforecast_tags__ = {
            "library": "spotforecast",
            "forecaster_name": "ForecasterRecursive",
            "forecaster_task": "regression",
            "forecasting_scope": "single-series",
            "forecasting_strategy": "recursive",
            "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
            "requires_index_frequency": True,
            "allowed_input_types_series": ["pandas.Series"],
            "supports_exog": True,
            "allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
            "handles_missing_values_series": False,
            "handles_missing_values_exog": True,
            "supports_lags": True,
            "supports_window_features": True,
            "supports_transformer_series": True,
            "supports_transformer_exog": True,
            "supports_weight_func": True,
            "supports_differentiation": True,
            "prediction_types": [
                "point",
                "interval",
                "bootstrapping",
                "quantiles",
                "distribution",
            ],
            "supports_probabilistic": True,
            "probabilistic_methods": ["bootstrapping", "conformal"],
            "handles_binned_residuals": True,
        }

create_predict_X(steps, last_window=None, exog=None, check_inputs=True)

Create the predictors needed to predict steps ahead. As it is a recursive process, the predictors are created at each iteration of the prediction process.

Parameters:

Name Type Description Default
steps int

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored in self.last_window_ are used to calculate the initial predictors, and the predictions start right after training data. Defaults to None.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s. Defaults to None.

None
check_inputs bool

If True, the input is checked for possible warnings and errors with the check_predict_input function. This argument is created for internal use and is not recommended to be changed. Defaults to True.

True

Returns:

Type Description
DataFrame

Pandas DataFrame with the predictors for each step. The index

DataFrame

is the same as the prediction index.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def create_predict_X(
    self,
    steps: int,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    check_inputs: bool = True,
) -> pd.DataFrame:
    """
    Create the predictors needed to predict `steps` ahead. As it is a recursive
    process, the predictors are created at each iteration of the prediction
    process.

    Args:
        steps:
            Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window:
            Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data. Defaults to None.
        exog:
            Exogenous variable/s included as predictor/s. Defaults to None.
        check_inputs:
            If `True`, the input is checked for possible warnings and errors
            with the `check_predict_input` function. This argument is created
            for internal use and is not recommended to be changed.
            Defaults to True.

    Returns:
        Pandas DataFrame with the predictors for each step. The index
        is the same as the prediction index.
    """

    (
        last_window_values,
        exog_values,
        prediction_index,
        steps,
    ) = self._create_predict_inputs(
        steps=steps,
        last_window=last_window,
        exog=exog,
        check_inputs=check_inputs,
    )

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        predictions = self._recursive_predict(
            steps=steps,
            last_window_values=last_window_values,
            exog_values=exog_values,
        )

    X_predict = []
    full_predictors = np.concatenate((last_window_values, predictions))

    if self.lags is not None:
        idx = np.arange(-steps, 0)[:, None] - self.lags
        X_lags = full_predictors[idx + len(full_predictors)]
        X_predict.append(X_lags)

    if self.window_features is not None:
        X_window_features = np.full(
            shape=(steps, len(self.X_train_window_features_names_out_)),
            fill_value=np.nan,
            order="C",
            dtype=float,
        )
        for i in range(steps):
            X_window_features[i, :] = np.concatenate(
                [
                    wf.transform(full_predictors[i : -(steps - i)])
                    for wf in self.window_features
                ]
            )
        X_predict.append(X_window_features)

    if exog is not None:
        X_predict.append(exog_values)

    X_predict = pd.DataFrame(
        data=np.concatenate(X_predict, axis=1),
        columns=self.X_train_features_names_out_,
        index=prediction_index,
    )

    if self.exog_in_:
        categorical_features = any(
            not pd.api.types.is_numeric_dtype(dtype)
            or pd.api.types.is_bool_dtype(dtype)
            for dtype in set(self.exog_dtypes_out_.values())
        )
        if categorical_features:
            X_predict = X_predict.astype(self.exog_dtypes_out_)

    if self.transformer_y is not None or self.differentiation is not None:
        warnings.warn(
            "The output matrix is in the transformed scale due to the "
            "inclusion of transformations or differentiation in the Forecaster. "
            "As a result, any predictions generated using this matrix will also "
            "be in the transformed scale. Please refer to the documentation "
            "for more details: "
            "https://skforecast.org/latest/user_guides/training-and-prediction-matrices.html",
            DataTransformationWarning,
        )

    return X_predict

create_sample_weights(X_train)

Create weights for each observation according to the forecaster's attribute weight_func.

Parameters:

Name Type Description Default
X_train DataFrame

Dataframe created with the create_train_X_y method, first return.

required

Returns:

Type Description
ndarray

Weights to use in fit method.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def create_sample_weights(self, X_train: pd.DataFrame) -> np.ndarray:
    """
    Create weights for each observation according to the forecaster's attribute
    `weight_func`.

    Args:
        X_train: Dataframe created with the `create_train_X_y` method, first return.

    Returns:
        Weights to use in `fit` method.
    """

    sample_weight = None

    if self.weight_func is not None:
        sample_weight = self.weight_func(X_train.index)

    if sample_weight is not None:
        if np.isnan(sample_weight).any():
            raise ValueError(
                "The resulting `sample_weight` cannot have NaN values."
            )
        if np.any(sample_weight < 0):
            raise ValueError(
                "The resulting `sample_weight` cannot have negative values."
            )
        if np.sum(sample_weight) == 0:
            raise ValueError(
                "The resulting `sample_weight` cannot be normalized because "
                "the sum of the weights is zero."
            )

    return sample_weight

create_train_X_y(y, exog=None)

Public method to create training predictors and target values.

This method is a public wrapper around the internal method _create_train_X_y, which generates the training predictors and target values based on the provided time series and exogenous variables. It ensures that the necessary transformations and feature engineering steps are applied to prepare the data for training the forecaster.

Parameters:

Name Type Description Default
y Series

Target series for training. Must be a pandas Series.

required
exog Union[Series, DataFrame, None]

Optional exogenous variables for training. Can be a pandas Series or DataFrame. Must have the same index as y and cover the same time range. Defaults to None.

None

Returns:

Type Description
Tuple[DataFrame, Series]

Tuple containing: - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided). - y_train: Series of target values aligned with the predictors.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> y = pd.Series(np.arange(30), name='y')
>>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=3,
...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
... )
>>> X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
>>> isinstance(X_train, pd.DataFrame)
True
>>> isinstance(y_train, pd.Series)
True
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def create_train_X_y(
    self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
) -> Tuple[pd.DataFrame, pd.Series]:
    """Public method to create training predictors and target values.

    This method is a public wrapper around the internal method `_create_train_X_y`,
    which generates the training predictors and target values based on the provided time series and exogenous variables.
    It ensures that the necessary transformations and feature engineering steps are applied to prepare the data for training the forecaster.

    Args:
        y: Target series for training. Must be a pandas Series.
        exog: Optional exogenous variables for training. Can be a pandas Series or DataFrame. Must have the same index as `y` and cover the same time range. Defaults to None.

    Returns:
        Tuple containing:
            - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided).
            - y_train: Series of target values aligned with the predictors.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> y = pd.Series(np.arange(30), name='y')
        >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
        >>> forecaster = ForecasterRecursive(
        ...     estimator=LinearRegression(),
        ...     lags=3,
        ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
        ... )
        >>> X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
        >>> isinstance(X_train, pd.DataFrame)
        True
        >>> isinstance(y_train, pd.Series)
        True

    """
    output = self._create_train_X_y(y=y, exog=exog)

    return output[0], output[1]

fit(y, exog=None, store_last_window=True, store_in_sample_residuals=False, random_state=123, suppress_warnings=False)

Fit the forecaster to the training data.

Parameters:

Name Type Description Default
y Series

Target series for training. Must be a pandas Series.

required
exog Union[Series, DataFrame, None]

Optional exogenous variables for training. Can be a pandas Series or DataFrame.Must have the same index as y and cover the same time range. Defaults to None.

None
store_last_window bool

Whether to store the last window of the training series for use in prediction. Defaults to True.

True
store_in_sample_residuals bool

Whether to store in-sample residuals after fitting, which can be used for certain probabilistic prediction methods. Defaults to False.

False
random_state int

Random seed for reproducibility when sampling residuals if store_in_sample_residuals is True. Defaults to 123.

123
suppress_warnings bool

Whether to suppress warnings during fitting, such as those related to insufficient data length for lags or window features. Defaults to False.

False

Returns:

Type Description
None

None

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> y = pd.Series(np.arange(30), name='y')
>>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=3,
...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
... )
>>> forecaster.fit(y=y, exog=exog, store_in_sample_residuals=True)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def fit(
    self,
    y: pd.Series,
    exog: Union[pd.Series, pd.DataFrame, None] = None,
    store_last_window: bool = True,
    store_in_sample_residuals: bool = False,
    random_state: int = 123,
    suppress_warnings: bool = False,
) -> None:
    """
    Fit the forecaster to the training data.

    Args:
        y:
              Target series for training. Must be a pandas Series.
        exog:
              Optional exogenous variables for training. Can be a pandas Series or DataFrame.Must have the same index as `y` and cover the same time range. Defaults to None.
        store_last_window:
              Whether to store the last window of the training series for use in prediction. Defaults to True.
        store_in_sample_residuals:
              Whether to store in-sample residuals after fitting, which can be used for certain probabilistic prediction methods. Defaults to False.
        random_state:
              Random seed for reproducibility when sampling residuals if `store_in_sample_residuals` is True. Defaults to 123.
        suppress_warnings:
              Whether to suppress warnings during fitting, such as those related to insufficient data length for lags or window features. Defaults to False.

    Returns:
        None

    Examples:
             >>> import numpy as np
             >>> import pandas as pd
             >>> from sklearn.linear_model import LinearRegression
             >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
             >>> from spotforecast2_safe.preprocessing import RollingFeatures
             >>> y = pd.Series(np.arange(30), name='y')
             >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
             >>> forecaster = ForecasterRecursive(
             ...     estimator=LinearRegression(),
             ...     lags=3,
             ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
             ... )
             >>> forecaster.fit(y=y, exog=exog, store_in_sample_residuals=True)
    """

    set_skforecast_warnings(suppress_warnings, action="ignore")

    # Reset values in case the forecaster has already been fitted.
    self.last_window_ = None
    self.index_type_ = None
    self.index_freq_ = None
    self.training_range_ = None
    self.series_name_in_ = None
    self.exog_in_ = False
    self.exog_names_in_ = None
    self.exog_type_in_ = None
    self.exog_dtypes_in_ = None
    self.exog_dtypes_out_ = None
    self.X_train_window_features_names_out_ = None
    self.X_train_exog_names_out_ = None
    self.X_train_features_names_out_ = None
    self.in_sample_residuals_ = None
    self.in_sample_residuals_by_bin_ = None
    self.binner_intervals_ = None
    self.is_fitted = False
    self.fit_date = None

    (
        X_train,
        y_train,
        exog_names_in_,
        X_train_window_features_names_out_,
        X_train_exog_names_out_,
        X_train_features_names_out_,
        exog_dtypes_in_,
        exog_dtypes_out_,
    ) = self._create_train_X_y(y=y, exog=exog)

    sample_weight = self.create_sample_weights(X_train=X_train)

    if sample_weight is not None:
        self.estimator.fit(
            X=X_train,
            y=y_train,
            sample_weight=sample_weight,
            **self.fit_kwargs,
        )
    else:
        self.estimator.fit(X=X_train, y=y_train, **self.fit_kwargs)

    self.X_train_window_features_names_out_ = X_train_window_features_names_out_
    self.X_train_features_names_out_ = X_train_features_names_out_

    self.is_fitted = True
    self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
    self.training_range_ = y.index[[0, -1]]
    self.index_type_ = type(y.index)
    if isinstance(y.index, pd.DatetimeIndex):
        self.index_freq_ = y.index.freqstr
    else:
        try:
            self.index_freq_ = y.index.step
        except AttributeError:
            self.index_freq_ = None

    if exog is not None:
        self.exog_in_ = True
        self.exog_type_in_ = type(exog)
        self.exog_names_in_ = exog_names_in_
        self.exog_dtypes_in_ = exog_dtypes_in_
        self.exog_dtypes_out_ = exog_dtypes_out_
        self.X_train_exog_names_out_ = X_train_exog_names_out_

    self.series_name_in_ = y.name if y.name is not None else "y"

    # NOTE: This is done to save time during fit in functions such as backtesting()
    if self._probabilistic_mode is not False:
        self._binning_in_sample_residuals(
            y_true=y_train.to_numpy(),
            y_pred=self.estimator.predict(X_train).ravel(),
            store_in_sample_residuals=store_in_sample_residuals,
            random_state=random_state,
        )

    if store_last_window:
        self.last_window_ = (
            y.iloc[-self.window_size :]
            .copy()
            .to_frame(name=y.name if y.name is not None else "y")
        )

    set_skforecast_warnings(suppress_warnings, action="default")

get_feature_importances(sort_importance=True)

Return feature importances of the estimator stored in the forecaster. Only valid when estimator stores internally the feature importances in the attribute feature_importances_ or coef_. Otherwise, returns None.

Parameters:

Name Type Description Default
sort_importance bool

If True, sorts the feature importances in descending order.

True

Returns:

Type Description
DataFrame

pd.DataFrame: Feature importances associated with each predictor.

Raises:

Type Description
NotFittedError

If the forecaster is not fitted.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.fit(y=pd.Series(np.arange(20)))
>>> forecaster.get_feature_importances()
  feature  importance
0   lag_1         1.0
1   lag_2         0.0
2   lag_3         0.0
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def get_feature_importances(self, sort_importance: bool = True) -> pd.DataFrame:
    """
    Return feature importances of the estimator stored in the forecaster.
    Only valid when estimator stores internally the feature importances in the
    attribute `feature_importances_` or `coef_`. Otherwise, returns `None`.

    Args:
        sort_importance: If `True`, sorts the feature importances in descending order.

    Returns:
        pd.DataFrame: Feature importances associated with each predictor.

    Raises:
        NotFittedError: If the forecaster is not fitted.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.fit(y=pd.Series(np.arange(20)))
        >>> forecaster.get_feature_importances()
          feature  importance
        0   lag_1         1.0
        1   lag_2         0.0
        2   lag_3         0.0
    """

    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `get_feature_importances()`."
        )

    if isinstance(self.estimator, Pipeline):
        estimator = self.estimator[-1]
    else:
        estimator = self.estimator

    if hasattr(estimator, "feature_importances_"):
        feature_importances = estimator.feature_importances_
    elif hasattr(estimator, "coef_"):
        feature_importances = estimator.coef_
    else:
        warnings.warn(
            f"Impossible to access feature importances for estimator of type "
            f"{type(estimator)}. This method is only valid when the "
            f"estimator stores internally the feature importances in the "
            f"attribute `feature_importances_` or `coef_`.",
            UserWarning,
        )
        feature_importances = None

    if feature_importances is not None:
        feature_importances = pd.DataFrame(
            {
                "feature": self.X_train_features_names_out_,
                "importance": feature_importances,
            }
        )
        if sort_importance:
            feature_importances = feature_importances.sort_values(
                by="importance", ascending=False
            )

    return feature_importances

get_params(deep=True)

Get parameters for this forecaster.

Parameters:

Name Type Description Default
deep bool

If True, will return the parameters for this forecaster and contained sub-objects that are estimators.

True

Returns:

Name Type Description
params Dict[str, object]

Dictionary of parameter names mapped to their values.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.get_params()
{
    'estimator': LinearRegression(), 'lags': 3, 'window_features': None,
    'transformer_y': None, 'transformer_exog': None, 'weight_func': None,
    'differentiation': None, 'fit_kwargs': {}, 'binner_kwargs': None, 'forecaster_id': '...'}
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def get_params(self, deep: bool = True) -> Dict[str, object]:
    """
    Get parameters for this forecaster.

    Args:
        deep: If True, will return the parameters for this forecaster and
            contained sub-objects that are estimators.

    Returns:
        params: Dictionary of parameter names mapped to their values.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.get_params()  # doctest: +ELLIPSIS
        {
            'estimator': LinearRegression(), 'lags': 3, 'window_features': None,
            'transformer_y': None, 'transformer_exog': None, 'weight_func': None,
            'differentiation': None, 'fit_kwargs': {}, 'binner_kwargs': None, 'forecaster_id': '...'}
    """
    params = {}
    for key in [
        "estimator",
        "lags",
        "window_features",
        "transformer_y",
        "transformer_exog",
        "weight_func",
        "differentiation",
        "fit_kwargs",
        "binner_kwargs",
        "forecaster_id",
    ]:
        if hasattr(self, key):
            params[key] = getattr(self, key)

    if not deep:
        return params

    if hasattr(self, "estimator") and self.estimator is not None:
        if hasattr(self.estimator, "get_params"):
            for key, value in self.estimator.get_params(deep=True).items():
                params[f"estimator__{key}"] = value

    return params

predict(steps, last_window=None, exog=None, check_inputs=True)

Predict future values recursively for the specified number of steps.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of future steps to predict.

required
last_window Union[Series, DataFrame, None]

Optional last window of observed values to use for prediction. If None, uses the last window from training. Must be a pandas Series or DataFrame with the same structure as the training target series. Defaults to None.

None
exog Union[Series, DataFrame, None]

Optional exogenous variables for prediction. Can be a pandas Series or DataFrame. Must have the same structure as the exogenous variables used in training. Defaults to None.

None
check_inputs bool

Whether to perform input validation checks. Defaults to True.

True

Returns:

Type Description
Series

Pandas Series of predicted values for the specified number of steps,

Series

indexed according to the prediction index constructed from the last window and the number of steps.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> y = pd.Series(np.arange(30), name='y')
>>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=3,
...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
... )
>>> forecaster.fit(y=y, exog=exog)
>>> last_window = y.iloc[-3:]
>>> exog_future = pd.DataFrame({'temp': np.random.randn(5)}, index=pd.RangeIndex(start=30, stop=35))
>>> predictions = forecaster.predict(
...     steps=5, last_window=last_window, exog=exog_future, check_inputs=True
... )
>>> isinstance(predictions, pd.Series)
True
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict(
    self,
    steps: int | str | pd.Timestamp,
    last_window: Union[pd.Series, pd.DataFrame, None] = None,
    exog: Union[pd.Series, pd.DataFrame, None] = None,
    check_inputs: bool = True,
) -> pd.Series:
    """
    Predict future values recursively for the specified number of steps.

    Args:
        steps:
            Number of future steps to predict.
        last_window:
            Optional last window of observed values to use for prediction. If None, uses the last window from training.
            Must be a pandas Series or DataFrame with the same structure as the training target series. Defaults to None.
        exog:
            Optional exogenous variables for prediction. Can be a pandas Series or DataFrame.
            Must have the same structure as the exogenous variables used in training. Defaults to None.
        check_inputs:
            Whether to perform input validation checks. Defaults to True.

    Returns:
        Pandas Series of predicted values for the specified number of steps,
        indexed according to the prediction index constructed from the last window and the number of steps.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> y = pd.Series(np.arange(30), name='y')
        >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
        >>> forecaster = ForecasterRecursive(
        ...     estimator=LinearRegression(),
        ...     lags=3,
        ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
        ... )
        >>> forecaster.fit(y=y, exog=exog)
        >>> last_window = y.iloc[-3:]
        >>> exog_future = pd.DataFrame({'temp': np.random.randn(5)}, index=pd.RangeIndex(start=30, stop=35))
        >>> predictions = forecaster.predict(
        ...     steps=5, last_window=last_window, exog=exog_future, check_inputs=True
        ... )
        >>> isinstance(predictions, pd.Series)
        True
    """

    last_window_values, exog_values, prediction_index, steps = (
        self._create_predict_inputs(
            steps=steps,
            last_window=last_window,
            exog=exog,
            check_inputs=check_inputs,
        )
    )

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        predictions = self._recursive_predict(
            steps=steps,
            last_window_values=last_window_values,
            exog_values=exog_values,
        )

    if self.differentiation is not None:
        predictions = self.differentiator.inverse_transform_next_window(predictions)

    predictions = transform_numpy(
        array=predictions,
        transformer=self.transformer_y,
        fit=False,
        inverse_transform=True,
    )

    predictions = pd.Series(data=predictions, index=prediction_index, name="pred")

    return predictions

predict_bootstrapping(steps, last_window=None, exog=None, n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Generate multiple forecasting predictions using a bootstrapping process. By sampling from a collection of past observed errors (the residuals), each iteration of bootstrapping generates a different set of predictions. See the References section for more information.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored in self.last_window_ are used to calculate the initial predictors, and the predictions start right after training data. Defaults to None.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s. Defaults to None.

None
n_boot int

Number of bootstrapping iterations to perform when estimating prediction intervals. Defaults to 250.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method. Defaults to True.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly. Defaults to True.

True
random_state int

Seed for the random number generator to ensure reproducibility. Defaults to 123.

123

Returns:

Type Description
DataFrame

Pandas DataFrame with predictions generated by bootstrapping. Shape: (steps, n_boot).

Raises:

Type Description
ValueError

If steps is not an integer or a valid date.

ValueError

If exog is missing or has invalid shape.

ValueError

If n_boot is not a positive integer.

ValueError

If use_in_sample_residuals=True and in_sample_residuals_ are not available.

ValueError

If use_in_sample_residuals=False and out_sample_residuals_ are not available.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> rng = np.random.default_rng(123)
>>> y = pd.Series(rng.normal(size=100), name='y')
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> _ = forecaster.fit(y=y)
>>> boot_preds = forecaster.predict_bootstrapping(steps=3, n_boot=5)
>>> boot_preds.shape
(3, 5)
References

.. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos. https://otexts.com/fpp3/prediction-intervals.html

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_bootstrapping(
    self,
    steps: int | str | pd.Timestamp,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Generate multiple forecasting predictions using a bootstrapping process.
    By sampling from a collection of past observed errors (the residuals),
    each iteration of bootstrapping generates a different set of predictions.
    See the References section for more information.

    Args:
        steps:
            Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window:
            Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data. Defaults to None.
        exog:
            Exogenous variable/s included as predictor/s. Defaults to None.
        n_boot:
            Number of bootstrapping iterations to perform when estimating prediction
            intervals. Defaults to 250.
        use_in_sample_residuals:
            If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method. Defaults to True.
        use_binned_residuals:
            If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly. Defaults to True.
        random_state:
            Seed for the random number generator to ensure reproducibility. Defaults to 123.

    Returns:
        Pandas DataFrame with predictions generated by bootstrapping. Shape: (steps, n_boot).

    Raises:
        ValueError:
            If `steps` is not an integer or a valid date.
        ValueError:
            If `exog` is missing or has invalid shape.
        ValueError:
            If `n_boot` is not a positive integer.
        ValueError:
            If `use_in_sample_residuals=True` and `in_sample_residuals_` are not available.
        ValueError:
            If `use_in_sample_residuals=False` and `out_sample_residuals_` are not available.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> rng = np.random.default_rng(123)
        >>> y = pd.Series(rng.normal(size=100), name='y')
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> _ = forecaster.fit(y=y)
        >>> boot_preds = forecaster.predict_bootstrapping(steps=3, n_boot=5)
        >>> boot_preds.shape
        (3, 5)

    References:
        .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
               https://otexts.com/fpp3/prediction-intervals.html
    """

    (
        last_window_values,
        exog_values,
        prediction_index,
        steps,
    ) = self._create_predict_inputs(
        steps=steps,
        last_window=last_window,
        exog=exog,
        predict_probabilistic=True,
        use_in_sample_residuals=use_in_sample_residuals,
        use_binned_residuals=use_binned_residuals,
        check_inputs=True,
    )

    if use_in_sample_residuals:
        residuals = self.in_sample_residuals_
        residuals_by_bin = self.in_sample_residuals_by_bin_
    else:
        residuals = self.out_sample_residuals_
        residuals_by_bin = self.out_sample_residuals_by_bin_

    rng = np.random.default_rng(seed=random_state)
    if use_binned_residuals:
        # Create 3D array with sampled residuals: (n_bins, steps, n_boot)
        n_bins = len(residuals_by_bin)
        sampled_residuals = np.stack(
            [
                residuals_by_bin[k][
                    rng.integers(
                        low=0, high=len(residuals_by_bin[k]), size=(steps, n_boot)
                    )
                ]
                for k in range(n_bins)
            ],
            axis=0,
        )
    else:
        sampled_residuals = residuals[
            rng.integers(low=0, high=len(residuals), size=(steps, n_boot))
        ]

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        boot_predictions = self._recursive_predict_bootstrapping(
            steps=steps,
            last_window_values=last_window_values,
            exog_values=exog_values,
            sampled_residuals=sampled_residuals,
            use_binned_residuals=use_binned_residuals,
            n_boot=n_boot,
        )

    if self.differentiation is not None:
        boot_predictions = self.differentiator.inverse_transform_next_window(
            boot_predictions
        )

    if self.transformer_y:
        boot_predictions = transform_numpy(
            array=boot_predictions,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=True,
        )

    boot_columns = [f"pred_boot_{i}" for i in range(n_boot)]
    boot_predictions = pd.DataFrame(
        data=boot_predictions, index=prediction_index, columns=boot_columns
    )

    return boot_predictions

predict_dist(steps, distribution, last_window=None, exog=None, n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Fit a given probability distribution for each step. After generating multiple forecasting predictions through a bootstrapping process, each step is fitted to the given distribution.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
distribution object

A distribution object from scipy.stats with methods _pdf and fit. For example scipy.stats.norm.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored inself.last_window_ are used to calculate the initial predictors, and the predictions start right after training data.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s.

None
n_boot int

Number of bootstrapping iterations to perform when estimating prediction intervals.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly.

True
random_state int

Seed for the random number generator to ensure reproducibility.

123

Returns:

Type Description
DataFrame

Distribution parameters estimated for each step.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_dist(
    self,
    steps: int | str | pd.Timestamp,
    distribution: object,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Fit a given probability distribution for each step. After generating
    multiple forecasting predictions through a bootstrapping process, each
    step is fitted to the given distribution.

    Args:
        steps: Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        distribution: A distribution object from scipy.stats with methods `_pdf` and `fit`.
            For example scipy.stats.norm.
        last_window: Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in` self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data.
        exog: Exogenous variable/s included as predictor/s.
        n_boot: Number of bootstrapping iterations to perform when estimating prediction
            intervals.
        use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method.
        use_binned_residuals: If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly.
        random_state: Seed for the random number generator to ensure reproducibility.

    Returns:
        Distribution parameters estimated for each step.
    """

    if not hasattr(distribution, "_pdf") or not callable(
        getattr(distribution, "fit", None)
    ):
        raise TypeError(
            "`distribution` must be a valid probability distribution object "
            "from scipy.stats, with methods `_pdf` and `fit`."
        )

    predictions = self.predict_bootstrapping(
        steps=steps,
        last_window=last_window,
        exog=exog,
        n_boot=n_boot,
        random_state=random_state,
        use_in_sample_residuals=use_in_sample_residuals,
        use_binned_residuals=use_binned_residuals,
    )

    param_names = [
        p for p in inspect.signature(distribution._pdf).parameters if not p == "x"
    ] + ["loc", "scale"]

    predictions[param_names] = predictions.apply(
        lambda x: distribution.fit(x), axis=1, result_type="expand"
    )
    predictions = predictions[param_names]

    return predictions

predict_interval(steps, last_window=None, exog=None, method='bootstrapping', interval=[5, 95], n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Predict n steps ahead and estimate prediction intervals using either bootstrapping or conformal prediction methods. Refer to the References section for additional details on these methods.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored in self.last_window_ are used to calculate the initial predictors, and the predictions start right after training data. Defaults to None.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s. Defaults to None.

None
method str

Technique used to estimate prediction intervals. Available options: - 'bootstrapping': Bootstrapping is used to generate prediction intervals [1]. - 'conformal': Employs the conformal prediction split method for interval estimation [2]. Defaults to 'bootstrapping'.

'bootstrapping'
interval float | list[float] | tuple[float]

Confidence level of the prediction interval. Interpretation depends on the method used: - If float, represents the nominal (expected) coverage (between 0 and 1). For instance, interval=0.95 corresponds to [2.5, 97.5] percentiles. - If list or tuple, defines the exact percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5]. - When using method='conformal', the interval must be a float or a list/tuple defining a symmetric interval. Defaults to [5, 95].

[5, 95]
n_boot int

Number of bootstrapping iterations to perform when estimating prediction intervals. Defaults to 250.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method. Defaults to True.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly. Defaults to True.

True
random_state int

Seed for the random number generator to ensure reproducibility. Defaults to 123.

123

Returns:

Type Description
DataFrame

Pandas DataFrame with values predicted by the forecaster and their estimated interval.

DataFrame
  • pred: predictions.
DataFrame
  • lower_bound: lower bound of the interval.
DataFrame
  • upper_bound: upper bound of the interval.

Raises:

Type Description
ValueError

If method is not 'bootstrapping' or 'conformal'.

ValueError

If interval is invalid or not compatible with the chosen method.

ValueError

If inputs (steps, exog, etc.) are invalid.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> rng = np.random.default_rng(123)
>>> y = pd.Series(rng.normal(size=100), name='y')
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> _ = forecaster.fit(y=y)
>>> # Bootstrapping method
>>> intervals_boot = forecaster.predict_interval(
...     steps=3, method='bootstrapping', interval=[5, 95]
... )
>>> intervals_boot.columns.tolist()
['pred', 'lower_bound', 'upper_bound']
>>> # Conformal method
>>> intervals_conf = forecaster.predict_interval(
...     steps=3, method='conformal', interval=0.95
... )
>>> intervals_conf.columns.tolist()
['pred', 'lower_bound', 'upper_bound']
References

.. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos. https://otexts.com/fpp3/prediction-intervals.html .. [2] MAPIE - Model Agnostic Prediction Interval Estimator. https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_interval(
    self,
    steps: int | str | pd.Timestamp,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    method: str = "bootstrapping",
    interval: float | list[float] | tuple[float] = [5, 95],
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Predict n steps ahead and estimate prediction intervals using either
    bootstrapping or conformal prediction methods. Refer to the References
    section for additional details on these methods.

    Args:
        steps:
            Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window:
            Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data. Defaults to None.
        exog:
            Exogenous variable/s included as predictor/s. Defaults to None.
        method:
            Technique used to estimate prediction intervals. Available options:
            - 'bootstrapping': Bootstrapping is used to generate prediction
              intervals [1]_.
            - 'conformal': Employs the conformal prediction split method for
              interval estimation [2]_.
            Defaults to 'bootstrapping'.
        interval:
            Confidence level of the prediction interval. Interpretation depends
            on the method used:
            - If `float`, represents the nominal (expected) coverage (between 0
              and 1). For instance, `interval=0.95` corresponds to `[2.5, 97.5]`
              percentiles.
            - If `list` or `tuple`, defines the exact percentiles to compute, which
              must be between 0 and 100 inclusive. For example, interval
              of 95% should be as `interval = [2.5, 97.5]`.
            - When using `method='conformal'`, the interval must be a float or
              a list/tuple defining a symmetric interval.
            Defaults to [5, 95].
        n_boot:
            Number of bootstrapping iterations to perform when estimating prediction
            intervals. Defaults to 250.
        use_in_sample_residuals:
            If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method. Defaults to True.
        use_binned_residuals:
            If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly. Defaults to True.
        random_state:
            Seed for the random number generator to ensure reproducibility. Defaults to 123.

    Returns:
        Pandas DataFrame with values predicted by the forecaster and their estimated interval.
        - pred: predictions.
        - lower_bound: lower bound of the interval.
        - upper_bound: upper bound of the interval.

    Raises:
        ValueError:
            If `method` is not 'bootstrapping' or 'conformal'.
        ValueError:
             If `interval` is invalid or not compatible with the chosen method.
        ValueError:
            If inputs (`steps`, `exog`, etc.) are invalid.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> rng = np.random.default_rng(123)
        >>> y = pd.Series(rng.normal(size=100), name='y')
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> _ = forecaster.fit(y=y)
        >>> # Bootstrapping method
        >>> intervals_boot = forecaster.predict_interval(
        ...     steps=3, method='bootstrapping', interval=[5, 95]
        ... )
        >>> intervals_boot.columns.tolist()
        ['pred', 'lower_bound', 'upper_bound']

        >>> # Conformal method
        >>> intervals_conf = forecaster.predict_interval(
        ...     steps=3, method='conformal', interval=0.95
        ... )
        >>> intervals_conf.columns.tolist()
        ['pred', 'lower_bound', 'upper_bound']

    References:
        .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
               https://otexts.com/fpp3/prediction-intervals.html
        .. [2] MAPIE - Model Agnostic Prediction Interval Estimator.
               https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method
    """

    if method == "bootstrapping":

        if isinstance(interval, (list, tuple)):
            check_interval(interval=interval, ensure_symmetric_intervals=False)
            interval = np.array(interval) / 100
        else:
            check_interval(alpha=interval, alpha_literal="interval")
            interval = np.array([0.5 - interval / 2, 0.5 + interval / 2])

        boot_predictions = self.predict_bootstrapping(
            steps=steps,
            last_window=last_window,
            exog=exog,
            n_boot=n_boot,
            random_state=random_state,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )

        predictions = self.predict(
            steps=steps, last_window=last_window, exog=exog, check_inputs=False
        )

        predictions_interval = boot_predictions.quantile(
            q=interval, axis=1
        ).transpose()
        predictions_interval.columns = ["lower_bound", "upper_bound"]
        predictions = pd.concat((predictions, predictions_interval), axis=1)

    elif method == "conformal":

        if isinstance(interval, (list, tuple)):
            check_interval(interval=interval, ensure_symmetric_intervals=True)
            nominal_coverage = (interval[1] - interval[0]) / 100
        else:
            check_interval(alpha=interval, alpha_literal="interval")
            nominal_coverage = interval

        predictions = self._predict_interval_conformal(
            steps=steps,
            last_window=last_window,
            exog=exog,
            nominal_coverage=nominal_coverage,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )
    else:
        raise ValueError(
            f"Invalid `method` '{method}'. Choose 'bootstrapping' or 'conformal'."
        )

    return predictions

predict_quantiles(steps, last_window=None, exog=None, quantiles=[0.05, 0.5, 0.95], n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Calculate the specified quantiles for each step. After generating multiple forecasting predictions through a bootstrapping process, each quantile is calculated for each step.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored inself.last_window_ are used to calculate the initial predictors, and the predictions start right after training data.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s.

None
quantiles list[float] | tuple[float]

Sequence of quantiles to compute, which must be between 0 and 1 inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as quantiles = [0.05, 0.5, 0.95].

[0.05, 0.5, 0.95]
n_boot int

Number of bootstrapping iterations to perform when estimating quantiles.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly.

True
random_state int

Seed for the random number generator to ensure reproducibility.

123

Returns:

Type Description
DataFrame

Quantiles predicted by the forecaster.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_quantiles(
    self,
    steps: int | str | pd.Timestamp,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    quantiles: list[float] | tuple[float] = [0.05, 0.5, 0.95],
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Calculate the specified quantiles for each step. After generating
    multiple forecasting predictions through a bootstrapping process, each
    quantile is calculated for each step.

    Args:
        steps: Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window: Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in` self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data.
        exog: Exogenous variable/s included as predictor/s.
        quantiles: Sequence of quantiles to compute, which must be between 0 and 1
            inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as
            `quantiles = [0.05, 0.5, 0.95]`.
        n_boot: Number of bootstrapping iterations to perform when estimating quantiles.
        use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method.
        use_binned_residuals: If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly.
        random_state: Seed for the random number generator to ensure reproducibility.

    Returns:
        Quantiles predicted by the forecaster.
    """

    check_interval(quantiles=quantiles)

    boot_predictions = self.predict_bootstrapping(
        steps=steps,
        last_window=last_window,
        exog=exog,
        n_boot=n_boot,
        random_state=random_state,
        use_in_sample_residuals=use_in_sample_residuals,
        use_binned_residuals=use_binned_residuals,
    )

    predictions = boot_predictions.quantile(q=quantiles, axis=1).transpose()
    predictions.columns = [f"q_{q}" for q in quantiles]

    return predictions

set_fit_kwargs(fit_kwargs)

Set new values for the additional keyword arguments passed to the fit method of the estimator.

Parameters:

Name Type Description Default
fit_kwargs dict[str, object]

Dict of the form {"argument": new_value}.

required
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_fit_kwargs(self, fit_kwargs: dict[str, object]) -> None:
    """
    Set new values for the additional keyword arguments passed to the `fit`
    method of the estimator.

    Args:
        fit_kwargs: Dict of the form {"argument": new_value}.
    """

    self.fit_kwargs = check_select_fit_kwargs(self.estimator, fit_kwargs=fit_kwargs)

set_in_sample_residuals(y, exog=None, random_state=123)

Set in-sample residuals in case they were not calculated during the training process.

In-sample residuals are calculated as the difference between the true values and the predictions made by the forecaster using the training data. The following internal attributes are updated:

  • in_sample_residuals_: residuals stored in a numpy ndarray.
  • binner_intervals_: intervals used to bin the residuals are calculated using the quantiles of the predicted values.
  • in_sample_residuals_by_bin_: residuals are binned according to the predicted value they are associated with and stored in a dictionary, where the keys are the intervals of the predicted values and the values are the residuals associated with that range.

A total of 10_000 residuals are stored in the attribute in_sample_residuals_. If the number of residuals is greater than 10_000, a random sample of 10_000 residuals is stored. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_.

Parameters:

Name Type Description Default
y Series

Target time series.

required
exog: Exogenous variables.
random_state: Random state for reproducibility.

Returns:

Type Description
None

None

Raises:

Type Description
NotFittedError

If the forecaster is not fitted.

IndexError

If the index range of y does not match the range used during training.

ValueError

If the features generated from the provided data do not match those used during the training process.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
>>> forecaster.set_in_sample_residuals(y=pd.Series(np.arange(20)))
>>> forecaster.in_sample_residuals_
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_in_sample_residuals(
    self,
    y: pd.Series,
    exog: pd.Series | pd.DataFrame | None = None,
    random_state: int = 123,
) -> None:
    """
    Set in-sample residuals in case they were not calculated during the
    training process.

    In-sample residuals are calculated as the difference between the true
    values and the predictions made by the forecaster using the training
    data. The following internal attributes are updated:

    + `in_sample_residuals_`: residuals stored in a numpy ndarray.
    + `binner_intervals_`: intervals used to bin the residuals are calculated
    using the quantiles of the predicted values.
    + `in_sample_residuals_by_bin_`: residuals are binned according to the
    predicted value they are associated with and stored in a dictionary, where
    the keys are the intervals of the predicted values and the values are
    the residuals associated with that range.

    A total of 10_000 residuals are stored in the attribute `in_sample_residuals_`.
    If the number of residuals is greater than 10_000, a random sample of
    10_000 residuals is stored. The number of residuals stored per bin is
    limited to `10_000 // self.binner.n_bins_`.

    Args:
            y: Target time series.
        exog: Exogenous variables.
        random_state: Random state for reproducibility.

    Returns:
        None

    Raises:
        NotFittedError: If the forecaster is not fitted.
        IndexError: If the index range of `y` does not match the range
            used during training.
        ValueError: If the features generated from the provided data do not
            match those used during the training process.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
        >>> forecaster.set_in_sample_residuals(y=pd.Series(np.arange(20)))
        >>> forecaster.in_sample_residuals_
        array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    """
    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `set_in_sample_residuals()`."
        )

    check_y(y=y)
    y_index_range = check_extract_values_and_index(
        data=y, data_label="`y`", return_values=False
    )[1][[0, -1]]

    if not y_index_range.equals(self.training_range_):
        raise IndexError(
            f"The index range of `y` does not match the range "
            f"used during training. Please ensure the index is aligned "
            f"with the training data.\n"
            f"    Expected : {self.training_range_}\n"
            f"    Received : {y_index_range}"
        )

    (
        X_train,
        y_train,
        _,
        _,
        _,
        X_train_features_names_out_,
        *_,
    ) = self._create_train_X_y(y=y, exog=exog)

    if not X_train_features_names_out_ == self.X_train_features_names_out_:
        raise ValueError(
            f"Feature mismatch detected after matrix creation. The features "
            f"generated from the provided data do not match those used during "
            f"the training process. To correctly set in-sample residuals, "
            f"ensure that the same data and preprocessing steps are applied.\n"
            f"    Expected output : {self.X_train_features_names_out_}\n"
            f"    Current output  : {X_train_features_names_out_}"
        )

    self._binning_in_sample_residuals(
        y_true=y_train.to_numpy(),
        y_pred=self.estimator.predict(X_train).ravel(),
        store_in_sample_residuals=True,
        random_state=random_state,
    )

set_lags(lags=None)

Set new value to the attribute lags. Attributes lags_names, max_lag and window_size are also updated.

Parameters:

Name Type Description Default
lags Union[int, List[int], ndarray, range, None]

Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1. - int: include lags from 1 to lags (included). - list, 1d numpy ndarray or range: include only lags present in lags, all elements must be int. - None: no lags are included as predictors.

None
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_lags(
    self, lags: Union[int, List[int], np.ndarray, range, None] = None
) -> None:
    """
    Set new value to the attribute `lags`. Attributes `lags_names`,
    `max_lag` and `window_size` are also updated.

    Args:
        lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
            - `int`: include lags from 1 to `lags` (included).
            - `list`, `1d numpy ndarray` or `range`: include only lags present in
            `lags`, all elements must be int.
            - `None`: no lags are included as predictors.
    """

    if self.window_features is None and lags is None:
        raise ValueError(
            "At least one of the arguments `lags` or `window_features` "
            "must be different from None. This is required to create the "
            "predictors used in training the forecaster."
        )

    self.lags, self.lags_names, self.max_lag = initialize_lags(
        type(self).__name__, lags
    )
    self.window_size = max(
        [
            ws
            for ws in [self.max_lag, self.max_size_window_features]
            if ws is not None
        ]
    )
    if self.differentiation is not None:
        self.window_size += self.differentiation
        self.differentiator.set_params(window_size=self.window_size)

set_out_sample_residuals(y_true, y_pred, append=False, random_state=123)

Set new values to the attribute out_sample_residuals_.

Out of sample residuals are meant to be calculated using observations that did not participate in the training process. y_true and y_pred are expected to be in the original scale of the time series. Residuals are calculated as y_true - y_pred, after applying the necessary transformations and differentiations if the forecaster includes them (self.transformer_y and self.differentiation). Two internal attributes are updated:

  • out_sample_residuals_: residuals stored in a numpy ndarray.
  • out_sample_residuals_by_bin_: residuals are binned according to the predicted value they are associated with and stored in a dictionary, where the keys are the intervals of the predicted values and the values are the residuals associated with that range. If a bin is empty, it is filled with a random sample of residuals from other bins. This is done to ensure that all bins have at least one residual and can be used in the prediction process.

A total of 10_000 residuals are stored in the attribute out_sample_residuals_. If the number of residuals is greater than 10_000, a random sample of 10_000 residuals is stored. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_.

Parameters:

Name Type Description Default
y_true ndarray | Series

True values of the time series in the original scale.

required
y_pred ndarray | Series

Predicted values of the time series in the original scale.

required
append bool

If True, new residuals are added to the once already stored in the forecaster. If after appending the new residuals, the limit of 10_000 // self.binner.n_bins_ values per bin is reached, a random sample of residuals is stored.

False
random_state int

Random state for reproducibility.

123

Returns:

Type Description
None

None

Raises:

Type Description
NotFittedError

If the forecaster is not fitted.

TypeError

If y_true or y_pred are not numpy ndarray or pandas Series.

ValueError

If y_true and y_pred have different length or index (if Series).

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
>>> y_true = np.array([20, 21, 22, 23, 24])
>>> y_pred = np.array([20.1, 20.9, 22.2, 22.8, 24.0])
>>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=y_pred)
>>> forecaster.out_sample_residuals_
array([-0.1,  0.1, -0.2,  0.2,  0. ])
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_out_sample_residuals(
    self,
    y_true: np.ndarray | pd.Series,
    y_pred: np.ndarray | pd.Series,
    append: bool = False,
    random_state: int = 123,
) -> None:
    """
    Set new values to the attribute `out_sample_residuals_`.

    Out of sample residuals are meant to be calculated using observations that
    did not participate in the training process. `y_true` and `y_pred` are
    expected to be in the original scale of the time series. Residuals are
    calculated as `y_true` - `y_pred`, after applying the necessary
    transformations and differentiations if the forecaster includes them
    (`self.transformer_y` and `self.differentiation`). Two internal attributes
    are updated:

    + `out_sample_residuals_`: residuals stored in a numpy ndarray.
    + `out_sample_residuals_by_bin_`: residuals are binned according to the
    predicted value they are associated with and stored in a dictionary, where
    the keys are the intervals of the predicted values and the values are
    the residuals associated with that range. If a bin is empty, it is filled
    with a random sample of residuals from other bins. This is done to ensure
    that all bins have at least one residual and can be used in the prediction
    process.

    A total of 10_000 residuals are stored in the attribute `out_sample_residuals_`.
    If the number of residuals is greater than 10_000, a random sample of
    10_000 residuals is stored. The number of residuals stored per bin is
    limited to `10_000 // self.binner.n_bins_`.

    Args:
        y_true: True values of the time series in the original scale.
        y_pred: Predicted values of the time series in the original scale.
        append: If `True`, new residuals are added to the once already stored
            in the forecaster. If after appending the new residuals, the limit
            of `10_000 // self.binner.n_bins_` values per bin is reached, a
            random sample of residuals is stored.
        random_state: Random state for reproducibility.

    Returns:
        None

    Raises:
        NotFittedError: If the forecaster is not fitted.
        TypeError: If `y_true` or `y_pred` are not `numpy ndarray` or `pandas Series`.
        ValueError: If `y_true` and `y_pred` have different length or index (if Series).

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
        >>> y_true = np.array([20, 21, 22, 23, 24])
        >>> y_pred = np.array([20.1, 20.9, 22.2, 22.8, 24.0])
        >>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=y_pred)
        >>> forecaster.out_sample_residuals_
        array([-0.1,  0.1, -0.2,  0.2,  0. ])
    """
    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `set_out_sample_residuals()`."
        )

    if not isinstance(y_true, (np.ndarray, pd.Series)):
        raise TypeError(
            f"`y_true` argument must be `numpy ndarray` or `pandas Series`. "
            f"Got {type(y_true)}."
        )

    if not isinstance(y_pred, (np.ndarray, pd.Series)):
        raise TypeError(
            f"`y_pred` argument must be `numpy ndarray` or `pandas Series`. "
            f"Got {type(y_pred)}."
        )

    if len(y_true) != len(y_pred):
        raise ValueError(
            f"`y_true` and `y_pred` must have the same length. "
            f"Got {len(y_true)} and {len(y_pred)}."
        )

    if isinstance(y_true, pd.Series) and isinstance(y_pred, pd.Series):
        if not y_true.index.equals(y_pred.index):
            raise ValueError("`y_true` and `y_pred` must have the same index.")

    if not isinstance(y_pred, np.ndarray):
        y_pred = y_pred.to_numpy()
    if not isinstance(y_true, np.ndarray):
        y_true = y_true.to_numpy()

    if self.transformer_y:
        y_true = transform_numpy(
            array=y_true,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=False,
        )
        y_pred = transform_numpy(
            array=y_pred,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=False,
        )

    if self.differentiation is not None:
        differentiator = copy(self.differentiator)
        differentiator.set_params(window_size=None)
        y_true = differentiator.fit_transform(y_true)[self.differentiation :]
        y_pred = differentiator.fit_transform(y_pred)[self.differentiation :]

    data = pd.DataFrame(
        {"prediction": y_pred, "residuals": y_true - y_pred}
    ).dropna()
    y_pred = data["prediction"].to_numpy()
    residuals = data["residuals"].to_numpy()

    if self.binner is not None:
        data["bin"] = self.binner.transform(y_pred).astype(int)
        residuals_by_bin = (
            data.groupby("bin")["residuals"].apply(np.array).to_dict()
        )
    else:
        residuals_by_bin = {}

    out_sample_residuals = (
        np.array([])
        if self.out_sample_residuals_ is None
        else self.out_sample_residuals_
    )
    out_sample_residuals_by_bin = (
        {}
        if self.out_sample_residuals_by_bin_ is None
        else self.out_sample_residuals_by_bin_
    )
    if append:
        out_sample_residuals = np.concatenate([out_sample_residuals, residuals])
        for k, v in residuals_by_bin.items():
            if k in out_sample_residuals_by_bin:
                out_sample_residuals_by_bin[k] = np.concatenate(
                    (out_sample_residuals_by_bin[k], v)
                )
            else:
                out_sample_residuals_by_bin[k] = v
    else:
        out_sample_residuals = residuals
        out_sample_residuals_by_bin = residuals_by_bin

    if self.binner is not None:
        max_samples = 10_000 // self.binner.n_bins
        rng = np.random.default_rng(seed=random_state)

        for k, v in out_sample_residuals_by_bin.items():
            if len(v) > max_samples:
                out_sample_residuals_by_bin[k] = rng.choice(
                    v, size=max_samples, replace=False
                )

        bin_keys = (
            [] if self.binner_intervals_ is None else self.binner_intervals_.keys()
        )
        empty_bins = [
            k
            for k in bin_keys
            if k not in out_sample_residuals_by_bin
            or len(out_sample_residuals_by_bin[k]) == 0
        ]

        if empty_bins:
            warnings.warn(
                f"The following bins have no out of sample residuals: {empty_bins}. "
                f"No predicted values fall in the interval "
                f"{[self.binner_intervals_[bin] for bin in empty_bins]}. "
                f"Empty bins will be filled with a random sample of residuals.",
                ResidualsUsageWarning,
            )
            empty_bin_size = min(max_samples, len(out_sample_residuals))
            for k in empty_bins:
                out_sample_residuals_by_bin[k] = rng.choice(
                    a=out_sample_residuals, size=empty_bin_size, replace=False
                )

    self.out_sample_residuals_ = out_sample_residuals
    self.out_sample_residuals_by_bin_ = out_sample_residuals_by_bin

set_params(params=None, **kwargs)

Set the parameters of this forecaster.

Parameters:

Name Type Description Default
params Dict[str, object]

Optional dictionary of parameter names mapped to their new values. If provided, these parameters are set first.

None
**kwargs object

Dictionary of parameter names mapped to their new values. Parameters can be for the forecaster itself or for the contained estimator (using the estimator__ prefix).

{}

Returns:

Name Type Description
self 'ForecasterRecursive'

The forecaster instance with updated parameters.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.set_params(estimator__fit_intercept=False)
>>> forecaster.estimator.get_params()["fit_intercept"]
False
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_params(
    self, params: Dict[str, object] = None, **kwargs: object
) -> "ForecasterRecursive":
    """
    Set the parameters of this forecaster.

    Args:
        params: Optional dictionary of parameter names mapped to their new values.
            If provided, these parameters are set first.
        **kwargs: Dictionary of parameter names mapped to their new values.
            Parameters can be for the forecaster itself or for the contained estimator (using the `estimator__` prefix).

    Returns:
        self: The forecaster instance with updated parameters.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.set_params(estimator__fit_intercept=False)
        >>> forecaster.estimator.get_params()["fit_intercept"]
        False
    """

    # Merge params dict and kwargs
    all_params = {}
    if params is not None:
        all_params.update(params)
    all_params.update(kwargs)

    if not all_params:
        return self

    valid_params = self.get_params(deep=True)
    nested_params = {}

    for key, value in all_params.items():
        if key not in valid_params and "__" not in key:
            # Relaxed check for now
            pass

        if "__" in key:
            obj_name, param_name = key.split("__", 1)
            if obj_name not in nested_params:
                nested_params[obj_name] = {}
            nested_params[obj_name][param_name] = value
        else:
            setattr(self, key, value)

    for obj_name, obj_params in nested_params.items():
        if hasattr(self, obj_name):
            obj = getattr(self, obj_name)
            if hasattr(obj, "set_params"):
                obj.set_params(**obj_params)
            else:
                for param_name, value in obj_params.items():
                    setattr(obj, param_name, value)

    return self

set_window_features(window_features=None)

Set new value to the attribute window_features.

Attributes max_size_window_features, window_features_names, window_features_class_names and window_size are also updated.

Parameters:

Name Type Description Default
window_features object | list[object] | None

Instance or list of instances used to create window features. Window features are created from the original time series and are included as predictors.

None

Returns:

Type Description
None

None

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> rolling = RollingFeatures(stats=['mean', 'std'], window_sizes=[3, 5])
>>> forecaster.set_window_features(window_features=rolling)
>>> forecaster.window_features_names
['roll_mean_3', 'roll_std_3', 'roll_mean_5', 'roll_std_5']
>>> forecaster.window_size
5
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_window_features(
    self, window_features: object | list[object] | None = None
) -> None:
    """
    Set new value to the attribute `window_features`.

    Attributes `max_size_window_features`, `window_features_names`,
    `window_features_class_names` and `window_size` are also updated.

    Args:
        window_features: Instance or list of instances used to create window features.
            Window features are created from the original time series and are
            included as predictors.

    Returns:
        None

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> rolling = RollingFeatures(stats=['mean', 'std'], window_sizes=[3, 5])
        >>> forecaster.set_window_features(window_features=rolling)
        >>> forecaster.window_features_names
        ['roll_mean_3', 'roll_std_3', 'roll_mean_5', 'roll_std_5']
        >>> forecaster.window_size
        5
    """

    if window_features is None and self.lags is None:
        raise ValueError(
            "At least one of the arguments `lags` or `window_features` "
            "must be different from None. This is required to create the "
            "predictors used in training the forecaster."
        )

    (
        self.window_features,
        self.window_features_names,
        self.max_size_window_features,
    ) = initialize_window_features(window_features)
    self.window_features_class_names = None
    if window_features is not None:
        self.window_features_class_names = [
            type(wf).__name__ for wf in self.window_features
        ]
    self.window_size = max(
        [
            ws
            for ws in [self.max_lag, self.max_size_window_features]
            if ws is not None
        ]
    )
    if self.differentiation is not None:
        self.window_size += self.differentiation
        self.differentiator.set_params(window_size=self.window_size)

Base Forecaster

base

spotforecast2_safe.forecaster.base

ForecasterBase class.

This module contains the base class for all forecasters in spotforecast2_safe and spotforecast. All forecasters should specify all the parameters that can be set at the class level in their init.

Examples:

Create a custom forecaster inheriting from ForecasterBase:

>>> from spotforecast2_safe.forecaster.base import ForecasterBase
>>> import pandas as pd
>>> import numpy as np
>>> class MyForecaster(ForecasterBase):
...     def __init__(self, estimator):
...         self.estimator = estimator
...         self.__spotforecast_tags__ = {'hide_lags': True}
...     def create_train_X_y(self, y, exog=None):
...         return pd.DataFrame(), pd.Series(dtype=float)
...     def fit(self, y, exog=None):
...         pass
...     def predict(self, steps, last_window=None, exog=None):
...         return pd.Series(np.zeros(steps))
...     def set_params(self, params):
...         pass
>>> from sklearn.linear_model import Ridge
>>> forecaster = MyForecaster(estimator=Ridge())
>>> forecaster
MyForecaster(estimator=Ridge())

ForecasterBase

Bases: ABC

Base class for all forecasters in spotforecast2.

All forecasters should specify all the parameters that can be set at the class level in their init.

Attributes:

Name Type Description
__spotforecast_tags__

Dictionary with forecaster tags that characterize the behavior of the forecaster.

Examples:

To see all abstract methods that need to be implemented:

>>> import inspect
>>> from spotforecast2_safe.forecaster.base import ForecasterBase
>>> [m[0] for m in inspect.getmembers(ForecasterBase, predicate=inspect.isabstract)]
['create_train_X_y', 'fit', 'predict', 'set_params']
Source code in src/spotforecast2_safe/forecaster/base.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
class ForecasterBase(ABC):
    """Base class for all forecasters in spotforecast2.

    All forecasters should specify all the parameters that can be set at
    the class level in their __init__.

    Attributes:
        __spotforecast_tags__: Dictionary with forecaster tags that characterize
            the behavior of the forecaster.

    Examples:
        To see all abstract methods that need to be implemented:

        >>> import inspect
        >>> from spotforecast2_safe.forecaster.base import ForecasterBase
        >>> [m[0] for m in inspect.getmembers(ForecasterBase, predicate=inspect.isabstract)]
        ['create_train_X_y', 'fit', 'predict', 'set_params']
    """

    def _preprocess_repr(
        self,
        estimator: object | None = None,
        training_range_: dict[str, str] | None = None,
        series_names_in_: list[str] | None = None,
        exog_names_in_: list[str] | None = None,
        transformer_series: object | dict[str, object] | None = None,
    ) -> tuple[str, str | None, str | None, str | None, str | None]:
        """Prepare the information to be displayed when a Forecaster object is printed.

        Args:
            estimator: Estimator object. Default is None.
            training_range_: Training range. Only used for ForecasterRecursiveMultiSeries.
                Default is None.
            series_names_in_: Names of the series used in the forecaster.
                Only used for ForecasterRecursiveMultiSeries. Default is None.
            exog_names_in_: Names of the exogenous variables used in the forecaster.
                Default is None.
            transformer_series: Transformer used in the series.
                Only used for ForecasterRecursiveMultiSeries. Default is None.

        Returns:
            Tuple containing params (estimator parameters string), training_range_
            (training range string representation), series_names_in_ (series names
            string representation), exog_names_in_ (exogenous variable names string
            representation), and transformer_series (transformer string representation).

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> estimator = Ridge(alpha=0.5)
            >>> forecaster = ForecasterRecursive(estimator=estimator, lags=3)
            >>> params, tr, sn, en, ts = forecaster._preprocess_repr(estimator=estimator)
            >>> params
            "{'alpha': 0.5, 'copy_X': True, 'fit_intercept': True, 'max_iter': None, 'positive': False, 'random_state': None, 'solver': 'auto', 'tol': 0.0001}"
        """

        if estimator is not None:
            if isinstance(estimator, Pipeline):
                name_pipe_steps = tuple(
                    name + "__" for name in estimator.named_steps.keys()
                )
                params = {
                    key: value
                    for key, value in estimator.get_params().items()
                    if key.startswith(name_pipe_steps)
                }
            else:
                params = estimator.get_params()
            params = str(params)
        else:
            params = None

        if training_range_ is not None:
            training_range_ = [
                f"'{k}': {v.astype(str).to_list()}" for k, v in training_range_.items()
            ]
            if len(training_range_) > 10:
                training_range_ = training_range_[:5] + ["..."] + training_range_[-5:]
            training_range_ = ", ".join(training_range_)

        if series_names_in_ is not None:
            if len(series_names_in_) > 50:
                series_names_in_ = (
                    series_names_in_[:25] + ["..."] + series_names_in_[-25:]
                )
            series_names_in_ = ", ".join(series_names_in_)

        if exog_names_in_ is not None:
            if len(exog_names_in_) > 50:
                exog_names_in_ = exog_names_in_[:25] + ["..."] + exog_names_in_[-25:]
            exog_names_in_ = ", ".join(exog_names_in_)

        if transformer_series is not None:
            if isinstance(transformer_series, dict):
                transformer_series = [
                    f"'{k}': {v}" for k, v in transformer_series.items()
                ]
                if len(transformer_series) > 10:
                    transformer_series = (
                        transformer_series[:5] + ["..."] + transformer_series[-5:]
                    )
                transformer_series = ", ".join(transformer_series)
            else:
                transformer_series = str(transformer_series)

        return (
            params,
            training_range_,
            series_names_in_,
            exog_names_in_,
            transformer_series,
        )

    def _format_text_repr(
        self,
        text: str,
        max_text_length: int = 58,
        width: int = 80,
        indent: str = "    ",
    ) -> str:
        """Format text for __repr__ method.

        Args:
            text: Text to format.
            max_text_length: Maximum length of the text before wrapping. Default is 58.
            width: Maximum width of the text. Default is 80.
            indent: Indentation of the text. Default is four spaces.

        Returns:
            Formatted text string with proper wrapping and indentation.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster._format_text_repr("Short text")
            'Short text'
        """

        if text is not None and len(text) > max_text_length:
            text = "\n    " + textwrap.fill(
                str(text), width=width, subsequent_indent=indent
            )

        return text

    @abstractmethod
    def create_train_X_y(
        self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None
    ) -> tuple[pd.DataFrame, pd.Series]:
        """Create training matrices from univariate time series and exogenous variables.

        Args:
            y: Training time series.
            exog: Exogenous variable(s) included as predictor(s). Must have the same
                number of observations as y and their indexes must be aligned.
                Default is None.

        Returns:
            Tuple containing X_train (training values/predictors with shape
            (len(y) - max_lag, len(lags))) and y_train (target values of the
            time series related to each row of X_train with shape (len(y) - max_lag,)).

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> y = pd.Series(np.arange(10), name='y')
            >>> X_train, y_train = forecaster.create_train_X_y(y)
            >>> X_train.head(2)
               lag_1  lag_2  lag_3
            3    2.0    1.0    0.0
            4    3.0    2.0    1.0
            >>> y_train.head(2)
            3    3
            4    4
            Name: y, dtype: int64
        """

        pass

    @abstractmethod
    def fit(self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None) -> None:
        """Training Forecaster.

        Args:
            y: Training time series.
            exog: Exogenous variable(s) included as predictor(s). Must have the same
                number of observations as y and their indexes must be aligned so
                that y[i] is regressed on exog[i]. Default is None.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> y = pd.Series(np.arange(10), name='y')
            >>> forecaster.fit(y)
            >>> forecaster.is_fitted
            True
        """

        pass

    @abstractmethod
    def predict(
        self,
        steps: int,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
    ) -> pd.Series:
        """Predict n steps ahead.

        Args:
            steps: Number of steps to predict.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1). If None, the values stored in
                last_window are used to calculate the initial predictors, and the
                predictions start right after training data. Default is None.
            exog: Exogenous variable(s) included as predictor(s). Default is None.

        Returns:
            Predicted values as a pandas Series.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> y = pd.Series(np.arange(10), name='y')
            >>> forecaster.fit(y)
            >>> forecaster.predict(steps=3)
            10    9.5
            11    9.0
            12    8.5
            Name: pred, dtype: float64
        """

        pass

    @abstractmethod
    def set_params(self, params: dict[str, object]) -> None:
        """Set new values to the parameters of the scikit-learn model stored in the forecaster.

        Args:
            params: Parameters values dictionary.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(alpha=1.0), lags=3)
            >>> forecaster.set_params({'estimator__alpha': 0.5})
            >>> forecaster.estimator.alpha
            0.5
        """

        pass

    def set_lags(
        self, lags: int | list[int] | np.ndarray[int] | range[int] | None = None
    ) -> None:
        """Set new value to the attribute lags.

        Attributes max_lag and window_size are also updated.

        Args:
            lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
                If int: include lags from 1 to lags (included). If list, 1d numpy ndarray,
                or range: include only lags present in lags, all elements must be int.
                If None: no lags are included as predictors. Default is None.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster.set_lags(lags=5)
            >>> forecaster.lags
            array([1, 2, 3, 4, 5])
        """

        pass

    def set_window_features(
        self, window_features: object | list[object] | None = None
    ) -> None:
        """Set new value to the attribute window_features.

        Attributes max_size_window_features, window_features_names,
        window_features_class_names and window_size are also updated.

        Args:
            window_features: Instance or list of instances used to create window features.
                Window features are created from the original time series and are
                included as predictors. Default is None.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> window_feat = RollingFeatures(stats='mean', window_sizes=3)
            >>> forecaster.set_window_features(window_features=window_feat)
            >>> forecaster.window_features
            [RollingFeatures(stats=['mean'], window_sizes=[3])]
        """

        pass

    def get_tags(self) -> dict[str, Any]:
        """Return the tags that characterize the behavior of the forecaster.

        Returns:
            Dictionary with forecaster tags describing behavior and capabilities.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> tags = forecaster.get_tags()
            >>> tags['forecaster_task']
            'regression'
        """

        return self.__spotforecast_tags__

    def summary(self) -> None:
        """Show forecaster information.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster.summary()
            ForecasterRecursive
            ===================
            Estimator: Ridge()
            Lags: [1 2 3]
            ...
        """

        print(self.__repr__())

    def __setstate__(self, state: dict) -> None:
        """Custom __setstate__ to ensure backward compatibility when unpickling.

        This method is called when an object is unpickled (deserialized).
        It handles the migration of deprecated attributes to their new names.

        Args:
            state: The state dictionary from the pickled object.

        Returns:
            None

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> import pickle
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> pickled_forecaster = pickle.dumps(forecaster)
            >>> unpickled_forecaster = pickle.loads(pickled_forecaster)
        """

        if "regressor" in state and "estimator" not in state:
            state["estimator"] = state.pop("regressor")

        self.__dict__.update(state)

    @property
    def regressor(self) -> Any:
        """Deprecated property. Use estimator instead.

        Returns:
            The estimator object.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from sklearn.linear_model import Ridge
            >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
            >>> forecaster.regressor # Raises FutureWarning
            Ridge()
        """
        warnings.warn(
            "The `regressor` attribute is deprecated and will be removed in future "
            "versions. Use `estimator` instead.",
            FutureWarning,
        )
        return self.estimator

regressor property

Deprecated property. Use estimator instead.

Returns:

Type Description
Any

The estimator object.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> forecaster.regressor # Raises FutureWarning
Ridge()

__setstate__(state)

Custom setstate to ensure backward compatibility when unpickling.

This method is called when an object is unpickled (deserialized). It handles the migration of deprecated attributes to their new names.

Parameters:

Name Type Description Default
state dict

The state dictionary from the pickled object.

required

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pickle
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> pickled_forecaster = pickle.dumps(forecaster)
>>> unpickled_forecaster = pickle.loads(pickled_forecaster)
Source code in src/spotforecast2_safe/forecaster/base.py
def __setstate__(self, state: dict) -> None:
    """Custom __setstate__ to ensure backward compatibility when unpickling.

    This method is called when an object is unpickled (deserialized).
    It handles the migration of deprecated attributes to their new names.

    Args:
        state: The state dictionary from the pickled object.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pickle
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> pickled_forecaster = pickle.dumps(forecaster)
        >>> unpickled_forecaster = pickle.loads(pickled_forecaster)
    """

    if "regressor" in state and "estimator" not in state:
        state["estimator"] = state.pop("regressor")

    self.__dict__.update(state)

create_train_X_y(y, exog=None) abstractmethod

Create training matrices from univariate time series and exogenous variables.

Parameters:

Name Type Description Default
y Series

Training time series.

required
exog Series | DataFrame | None

Exogenous variable(s) included as predictor(s). Must have the same number of observations as y and their indexes must be aligned. Default is None.

None

Returns:

Type Description
DataFrame

Tuple containing X_train (training values/predictors with shape

Series

(len(y) - max_lag, len(lags))) and y_train (target values of the

tuple[DataFrame, Series]

time series related to each row of X_train with shape (len(y) - max_lag,)).

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> y = pd.Series(np.arange(10), name='y')
>>> X_train, y_train = forecaster.create_train_X_y(y)
>>> X_train.head(2)
   lag_1  lag_2  lag_3
3    2.0    1.0    0.0
4    3.0    2.0    1.0
>>> y_train.head(2)
3    3
4    4
Name: y, dtype: int64
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def create_train_X_y(
    self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None
) -> tuple[pd.DataFrame, pd.Series]:
    """Create training matrices from univariate time series and exogenous variables.

    Args:
        y: Training time series.
        exog: Exogenous variable(s) included as predictor(s). Must have the same
            number of observations as y and their indexes must be aligned.
            Default is None.

    Returns:
        Tuple containing X_train (training values/predictors with shape
        (len(y) - max_lag, len(lags))) and y_train (target values of the
        time series related to each row of X_train with shape (len(y) - max_lag,)).

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> y = pd.Series(np.arange(10), name='y')
        >>> X_train, y_train = forecaster.create_train_X_y(y)
        >>> X_train.head(2)
           lag_1  lag_2  lag_3
        3    2.0    1.0    0.0
        4    3.0    2.0    1.0
        >>> y_train.head(2)
        3    3
        4    4
        Name: y, dtype: int64
    """

    pass

fit(y, exog=None) abstractmethod

Training Forecaster.

Parameters:

Name Type Description Default
y Series

Training time series.

required
exog Series | DataFrame | None

Exogenous variable(s) included as predictor(s). Must have the same number of observations as y and their indexes must be aligned so that y[i] is regressed on exog[i]. Default is None.

None

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> y = pd.Series(np.arange(10), name='y')
>>> forecaster.fit(y)
>>> forecaster.is_fitted
True
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def fit(self, y: pd.Series, exog: pd.Series | pd.DataFrame | None = None) -> None:
    """Training Forecaster.

    Args:
        y: Training time series.
        exog: Exogenous variable(s) included as predictor(s). Must have the same
            number of observations as y and their indexes must be aligned so
            that y[i] is regressed on exog[i]. Default is None.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> y = pd.Series(np.arange(10), name='y')
        >>> forecaster.fit(y)
        >>> forecaster.is_fitted
        True
    """

    pass

get_tags()

Return the tags that characterize the behavior of the forecaster.

Returns:

Type Description
dict[str, Any]

Dictionary with forecaster tags describing behavior and capabilities.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> tags = forecaster.get_tags()
>>> tags['forecaster_task']
'regression'
Source code in src/spotforecast2_safe/forecaster/base.py
def get_tags(self) -> dict[str, Any]:
    """Return the tags that characterize the behavior of the forecaster.

    Returns:
        Dictionary with forecaster tags describing behavior and capabilities.

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> tags = forecaster.get_tags()
        >>> tags['forecaster_task']
        'regression'
    """

    return self.__spotforecast_tags__

predict(steps, last_window=None, exog=None) abstractmethod

Predict n steps ahead.

Parameters:

Name Type Description Default
steps int

Number of steps to predict.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If None, the values stored in last_window are used to calculate the initial predictors, and the predictions start right after training data. Default is None.

None
exog Series | DataFrame | None

Exogenous variable(s) included as predictor(s). Default is None.

None

Returns:

Type Description
Series

Predicted values as a pandas Series.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> y = pd.Series(np.arange(10), name='y')
>>> forecaster.fit(y)
>>> forecaster.predict(steps=3)
10    9.5
11    9.0
12    8.5
Name: pred, dtype: float64
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def predict(
    self,
    steps: int,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
) -> pd.Series:
    """Predict n steps ahead.

    Args:
        steps: Number of steps to predict.
        last_window: Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1). If None, the values stored in
            last_window are used to calculate the initial predictors, and the
            predictions start right after training data. Default is None.
        exog: Exogenous variable(s) included as predictor(s). Default is None.

    Returns:
        Predicted values as a pandas Series.

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> y = pd.Series(np.arange(10), name='y')
        >>> forecaster.fit(y)
        >>> forecaster.predict(steps=3)
        10    9.5
        11    9.0
        12    8.5
        Name: pred, dtype: float64
    """

    pass

set_lags(lags=None)

Set new value to the attribute lags.

Attributes max_lag and window_size are also updated.

Parameters:

Name Type Description Default
lags int | list[int] | ndarray[int] | range[int] | None

Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1. If int: include lags from 1 to lags (included). If list, 1d numpy ndarray, or range: include only lags present in lags, all elements must be int. If None: no lags are included as predictors. Default is None.

None

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> forecaster.set_lags(lags=5)
>>> forecaster.lags
array([1, 2, 3, 4, 5])
Source code in src/spotforecast2_safe/forecaster/base.py
def set_lags(
    self, lags: int | list[int] | np.ndarray[int] | range[int] | None = None
) -> None:
    """Set new value to the attribute lags.

    Attributes max_lag and window_size are also updated.

    Args:
        lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
            If int: include lags from 1 to lags (included). If list, 1d numpy ndarray,
            or range: include only lags present in lags, all elements must be int.
            If None: no lags are included as predictors. Default is None.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> forecaster.set_lags(lags=5)
        >>> forecaster.lags
        array([1, 2, 3, 4, 5])
    """

    pass

set_params(params) abstractmethod

Set new values to the parameters of the scikit-learn model stored in the forecaster.

Parameters:

Name Type Description Default
params dict[str, object]

Parameters values dictionary.

required

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(alpha=1.0), lags=3)
>>> forecaster.set_params({'estimator__alpha': 0.5})
>>> forecaster.estimator.alpha
0.5
Source code in src/spotforecast2_safe/forecaster/base.py
@abstractmethod
def set_params(self, params: dict[str, object]) -> None:
    """Set new values to the parameters of the scikit-learn model stored in the forecaster.

    Args:
        params: Parameters values dictionary.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(alpha=1.0), lags=3)
        >>> forecaster.set_params({'estimator__alpha': 0.5})
        >>> forecaster.estimator.alpha
        0.5
    """

    pass

set_window_features(window_features=None)

Set new value to the attribute window_features.

Attributes max_size_window_features, window_features_names, window_features_class_names and window_size are also updated.

Parameters:

Name Type Description Default
window_features object | list[object] | None

Instance or list of instances used to create window features. Window features are created from the original time series and are included as predictors. Default is None.

None

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> window_feat = RollingFeatures(stats='mean', window_sizes=3)
>>> forecaster.set_window_features(window_features=window_feat)
>>> forecaster.window_features
[RollingFeatures(stats=['mean'], window_sizes=[3])]
Source code in src/spotforecast2_safe/forecaster/base.py
def set_window_features(
    self, window_features: object | list[object] | None = None
) -> None:
    """Set new value to the attribute window_features.

    Attributes max_size_window_features, window_features_names,
    window_features_class_names and window_size are also updated.

    Args:
        window_features: Instance or list of instances used to create window features.
            Window features are created from the original time series and are
            included as predictors. Default is None.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> window_feat = RollingFeatures(stats='mean', window_sizes=3)
        >>> forecaster.set_window_features(window_features=window_feat)
        >>> forecaster.window_features
        [RollingFeatures(stats=['mean'], window_sizes=[3])]
    """

    pass

summary()

Show forecaster information.

Returns:

Type Description
None

None

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from sklearn.linear_model import Ridge
>>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
>>> forecaster.summary()
ForecasterRecursive
===================
Estimator: Ridge()
Lags: [1 2 3]
...
Source code in src/spotforecast2_safe/forecaster/base.py
def summary(self) -> None:
    """Show forecaster information.

    Returns:
        None

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from sklearn.linear_model import Ridge
        >>> forecaster = ForecasterRecursive(estimator=Ridge(), lags=3)
        >>> forecaster.summary()
        ForecasterRecursive
        ===================
        Estimator: Ridge()
        Lags: [1 2 3]
        ...
    """

    print(self.__repr__())

Recursive Forecasting

recursive

spotforecast2_safe.forecaster.recursive

ForecasterEquivalentDate

This forecaster predicts future values based on the most recent equivalent date. It also allows to aggregate multiple past values of the equivalent date using a function (e.g. mean, median, max, min, etc.). The equivalent date is calculated by moving back in time a specified number of steps (offset). The offset can be defined as an integer or as a pandas DateOffset. This approach is useful as a baseline, but it is a simplistic method and may not capture complex underlying patterns.

Parameters:

Name Type Description Default
offset (int, DateOffset)

Number of steps to go back in time to find the most recent equivalent date to the target period. If offset is an integer, it represents the number of steps to go back in time. For example, if the frequency of the time series is daily, offset = 7 means that the most recent data similar to the target period is the value observed 7 days ago. Pandas DateOffsets can also be used to move forward a given number of valid dates. For example, Bday(2) can be used to move back two business days. If the date does not start on a valid date, it is first moved to a valid date. For example, if the date is a Saturday, it is moved to the previous Friday. Then, the offset is applied. If the result is a non-valid date, it is moved to the next valid date. For example, if the date is a Sunday, it is moved to the next Monday. For more information about offsets, see https://pandas.pydata.org/docs/reference/offset_frequency.html.

required
n_offsets int

Number of equivalent dates (multiple of offset) used in the prediction. Defaults to 1. If n_offsets is greater than 1, the values at the equivalent dates are aggregated using the agg_func function. For example, if the frequency of the time series is daily, offset = 7, n_offsets = 2 and agg_func = np.mean, the predicted value will be the mean of the values observed 7 and 14 days ago.

1
agg_func Callable

Function used to aggregate the values of the equivalent dates when the number of equivalent dates (n_offsets) is greater than 1. Defaults to np.mean.

mean
binner_kwargs dict

Additional arguments to pass to the QuantileBinner used to discretize the residuals into k bins according to the predicted values associated with each residual. Available arguments are: n_bins, method, subsample, random_state and dtype. Argument method is passed internally to the function numpy.percentile. Defaults to None.

None
forecaster_id (str, int)

Name used as an identifier of the forecaster. Defaults to None.

None

Attributes:

Name Type Description
offset (int, DateOffset)

Number of steps to go back in time to find the most recent equivalent date to the target period.

n_offsets int

Number of equivalent dates (multiple of offset) used in the prediction.

agg_func Callable

Function used to aggregate the values of the equivalent dates when the number of equivalent dates (n_offsets) is greater than 1.

window_size int

Number of past values needed to include the last equivalent dates according to the offset and n_offsets.

last_window_ pandas Series

This window represents the most recent data observed by the predictor during its training phase. It contains the past values needed to include the last equivalent date according the offset and n_offsets.

index_type_ type

Type of index of the input used in training.

index_freq_ str

Frequency of Index of the input used in training.

training_range_ pandas Index

First and last values of index of the data used during training.

series_name_in_ str

Names of the series provided by the user during training.

in_sample_residuals_ numpy ndarray

Residuals of the model when predicting training data. Only stored up to 10_000 values.

in_sample_residuals_by_bin_ dict

In sample residuals binned according to the predicted value each residual is associated with. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_ in the form {bin: residuals}.

out_sample_residuals_ numpy ndarray

Residuals of the model when predicting non-training data. Only stored up to 10_000 values. Use set_out_sample_residuals() method to set values.

out_sample_residuals_by_bin_ dict

Out of sample residuals binned according to the predicted value each residual is associated with. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_ in the form {bin: residuals}.

binner QuantileBinner

QuantileBinner used to discretize residuals into k bins according to the predicted values associated with each residual.

binner_intervals_ dict

Intervals used to discretize residuals into k bins according to the predicted values associated with each residual.

binner_kwargs dict

Additional arguments to pass to the QuantileBinner.

creation_date str

Date of creation.

is_fitted bool

Tag to identify if the estimator has been fitted (trained).

fit_date str

Date of last fit.

spotforecast_version str

Version of spotforecast library used to create the forecaster.

python_version str

Version of python used to create the forecaster.

forecaster_id (str, int)

Name used as an identifier of the forecaster.

estimator Ignored

Not used, present here for API consistency by convention.

differentiation Ignored

Not used, present here for API consistency by convention.

differentiation_max Ignored

Not used, present here for API consistency by convention.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> # Series with daily frequency
>>> data = pd.Series(
...     data = np.arange(14),
...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
... )
>>> # Forecast based on the value 7 days ago
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
>>> forecaster.predict(steps=3)
2022-01-15    7
2022-01-16    8
2022-01-17    9
Freq: D, Name: pred, dtype: int64
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
class ForecasterEquivalentDate:
    """
    This forecaster predicts future values based on the most recent equivalent
    date. It also allows to aggregate multiple past values of the equivalent
    date using a function (e.g. mean, median, max, min, etc.). The equivalent
    date is calculated by moving back in time a specified number of steps (offset).
    The offset can be defined as an integer or as a pandas DateOffset. This
    approach is useful as a baseline, but it is a simplistic method and may not
    capture complex underlying patterns.

    Args:
        offset (int, pandas.tseries.offsets.DateOffset): Number of steps to go back
            in time to find the most recent equivalent date to the target period.
            If `offset` is an integer, it represents the number of steps to go back
            in time. For example, if the frequency of the time series is daily,
            `offset = 7` means that the most recent data similar to the target
            period is the value observed 7 days ago.
            Pandas DateOffsets can also be used to move forward a given number of
            valid dates. For example, Bday(2) can be used to move back two business
            days. If the date does not start on a valid date, it is first moved to a
            valid date. For example, if the date is a Saturday, it is moved to the
            previous Friday. Then, the offset is applied. If the result is a non-valid
            date, it is moved to the next valid date. For example, if the date
            is a Sunday, it is moved to the next Monday.
            For more information about offsets, see
            https://pandas.pydata.org/docs/reference/offset_frequency.html.
        n_offsets (int, optional): Number of equivalent dates (multiple of offset)
            used in the prediction. Defaults to 1.
            If `n_offsets` is greater than 1, the values at the equivalent dates are
            aggregated using the `agg_func` function. For example, if the frequency
            of the time series is daily, `offset = 7`, `n_offsets = 2` and
            `agg_func = np.mean`, the predicted value will be the mean of the values
            observed 7 and 14 days ago.
        agg_func (Callable, optional): Function used to aggregate the values of the
            equivalent dates when the number of equivalent dates (`n_offsets`) is
            greater than 1. Defaults to np.mean.
        binner_kwargs (dict, optional): Additional arguments to pass to the
            `QuantileBinner` used to discretize the residuals into k bins according
            to the predicted values associated with each residual. Available arguments
            are: `n_bins`, `method`, `subsample`, `random_state` and `dtype`.
            Argument `method` is passed internally to the function `numpy.percentile`.
            Defaults to None.
        forecaster_id (str, int, optional): Name used as an identifier of the
            forecaster. Defaults to None.

    Attributes:
        offset (int, pandas.tseries.offsets.DateOffset): Number of steps to go back
            in time to find the most recent equivalent date to the target period.
        n_offsets (int): Number of equivalent dates (multiple of offset) used in
            the prediction.
        agg_func (Callable): Function used to aggregate the values of the equivalent
            dates when the number of equivalent dates (`n_offsets`) is greater than 1.
        window_size (int): Number of past values needed to include the last
            equivalent dates according to the `offset` and `n_offsets`.
        last_window_ (pandas Series): This window represents the most recent data
            observed by the predictor during its training phase. It contains the
            past values needed to include the last equivalent date according the
            `offset` and `n_offsets`.
        index_type_ (type): Type of index of the input used in training.
        index_freq_ (str): Frequency of Index of the input used in training.
        training_range_ (pandas Index): First and last values of index of the data
            used during training.
        series_name_in_ (str): Names of the series provided by the user during training.
        in_sample_residuals_ (numpy ndarray): Residuals of the model when predicting
            training data. Only stored up to 10_000 values.
        in_sample_residuals_by_bin_ (dict): In sample residuals binned according to
            the predicted value each residual is associated with. The number of
            residuals stored per bin is limited to `10_000 // self.binner.n_bins_`
            in the form `{bin: residuals}`.
        out_sample_residuals_ (numpy ndarray): Residuals of the model when predicting
            non-training data. Only stored up to 10_000 values. Use
            `set_out_sample_residuals()` method to set values.
        out_sample_residuals_by_bin_ (dict): Out of sample residuals binned
            according to the predicted value each residual is associated with.
            The number of residuals stored per bin is limited to
            `10_000 // self.binner.n_bins_` in the form `{bin: residuals}`.
        binner (spotforecast.preprocessing.QuantileBinner): `QuantileBinner` used to
            discretize residuals into k bins according to the predicted values
            associated with each residual.
        binner_intervals_ (dict): Intervals used to discretize residuals into k bins
            according to the predicted values associated with each residual.
        binner_kwargs (dict): Additional arguments to pass to the `QuantileBinner`.
        creation_date (str): Date of creation.
        is_fitted (bool): Tag to identify if the estimator has been fitted (trained).
        fit_date (str): Date of last fit.
        spotforecast_version (str): Version of spotforecast library used to create
            the forecaster.
        python_version (str): Version of python used to create the forecaster.
        forecaster_id (str, int): Name used as an identifier of the forecaster.
        estimator (Ignored): Not used, present here for API consistency by convention.
        differentiation (Ignored): Not used, present here for API consistency by convention.
        differentiation_max (Ignored): Not used, present here for API consistency by convention.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> # Series with daily frequency
        >>> data = pd.Series(
        ...     data = np.arange(14),
        ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
        ... )
        >>> # Forecast based on the value 7 days ago
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
        >>> forecaster.predict(steps=3)
        2022-01-15    7
        2022-01-16    8
        2022-01-17    9
        Freq: D, Name: pred, dtype: int64
    """

    def __init__(
        self,
        offset: int | pd.tseries.offsets.DateOffset,
        n_offsets: int = 1,
        agg_func: Callable = np.mean,
        binner_kwargs: dict[str, object] | None = None,
        forecaster_id: str | int | None = None,
    ) -> None:

        self.offset = offset
        self.n_offsets = n_offsets
        self.agg_func = agg_func
        self.last_window_ = None
        self.index_type_ = None
        self.index_freq_ = None
        self.training_range_ = None
        self.series_name_in_ = None
        self.in_sample_residuals_ = None
        self.out_sample_residuals_ = None
        self.in_sample_residuals_by_bin_ = None
        self.out_sample_residuals_by_bin_ = None
        self.creation_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
        self.is_fitted = False
        self.fit_date = None
        try:
            from spotforecast2_safe import __version__

            self.spotforecast_version = __version__
        except ImportError:
            self.spotforecast_version = "unknown"
        self.python_version = sys.version.split(" ")[0]
        self.forecaster_id = forecaster_id
        self._probabilistic_mode = "binned"
        self.estimator = None
        self.differentiation = None
        self.differentiation_max = None
        self.window_size = None  # Defaults to None, validated later

        if not isinstance(self.offset, (int, pd.tseries.offsets.DateOffset)):
            raise TypeError(
                "`offset` must be an integer greater than 0 or a "
                "pandas.tseries.offsets. Find more information about offsets in "
                "https://pandas.pydata.org/docs/reference/offset_frequency.html"
            )

        self.window_size = self.offset * self.n_offsets

        self.binner_kwargs = binner_kwargs
        if binner_kwargs is None:
            self.binner_kwargs = {
                "n_bins": 10,
                "method": "linear",
                "subsample": 200000,
                "random_state": 789654,
                "dtype": np.float64,
            }
        self.binner = QuantileBinner(**self.binner_kwargs)
        self.binner_intervals_ = None

        self.__spotforecast_tags__ = {
            "library": "spotforecast",
            "forecaster_name": "ForecasterEquivalentDate",
            "forecaster_task": "regression",
            "forecasting_scope": "single-series",  # single-series | global
            "forecasting_strategy": "recursive",  # recursive | direct | deep_learning
            "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
            "requires_index_frequency": True,
            "allowed_input_types_series": ["pandas.Series"],
            "supports_exog": False,
            "allowed_input_types_exog": [],
            "handles_missing_values_series": False,
            "handles_missing_values_exog": False,
            "supports_lags": False,
            "supports_window_features": False,
            "supports_transformer_series": False,
            "supports_transformer_exog": False,
            "supports_weight_func": False,
            "supports_differentiation": False,
            "prediction_types": ["point", "interval"],
            "supports_probabilistic": True,
            "probabilistic_methods": ["conformal"],
            "handles_binned_residuals": True,
        }

    def __repr__(self) -> str:
        """
        Information displayed when a Forecaster object is printed.

        Returns:
            str: Information about the forecaster. It contains the following information:
            - Offset: Value of the `offset` argument.
            - Number of offsets: Value of the `n_offsets` argument.
            - Aggregation function: Name of the `agg_func` function.
            - Window size: Value of the `window_size` attribute.
            - Series name: Name of the series provided by the user during training.
            - Training range: First and last values of index of the data used during training.
            - Training index type: Type of index of the data used during training.
            - Training index frequency: Frequency of index of the data used during training.
            - Creation date: Date of creation of the forecaster object.
            - Last fit date: Date of last fit of the forecaster object.
            - spotforecast version: Version of spotforecast library used to create the forecaster.
            - Python version: Version of python used to create the forecaster.
            - Forecaster id: Name used as an identifier of the forecaster.


        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data = np.arange(14),
            ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
            >>> print(forecaster)
            ============================
            ForecasterEquivalentDate
            ============================
            Offset: 7
            Number of offsets: 1
            Aggregation function: mean
            Window size: 7
            Series name: y
            Training range: [Timestamp('2022-01-01 00:00:00'), Timestamp('2022-01-14 00:00:00')]
            Training index type: DatetimeIndex
            Training index frequency: D
            Creation date: 2023-11-19 12:00:00
            Last fit date: 2023-11-19 12:00:00
            spotforecast version: 1.0.0
            Python version: 3.8.10
            Forecaster id: None

        """

        info = (
            f"{'=' * len(type(self).__name__)} \n"
            f"{type(self).__name__} \n"
            f"{'=' * len(type(self).__name__)} \n"
            f"Offset: {self.offset} \n"
            f"Number of offsets: {self.n_offsets} \n"
            f"Aggregation function: {self.agg_func.__name__} \n"
            f"Window size: {self.window_size} \n"
            f"Series name: {self.series_name_in_} \n"
            f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
            f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
            f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
            f"Creation date: {self.creation_date} \n"
            f"Last fit date: {self.fit_date} \n"
            f"spotforecast version: {self.spotforecast_version} \n"
            f"Python version: {self.python_version} \n"
            f"Forecaster id: {self.forecaster_id} \n"
        )

        return info

    def _repr_html_(self) -> str:
        """
        HTML representation of the object.
        The "General Information" section is expanded by default.

        Returns:
            str: HTML representation of the forecaster object. It contains the same
            information as the `__repr__` method, but in a more structured way.
            In detail, it contains the following information:
            - Offset: Value of the `offset` argument.
            - Number of offsets: Value of the `n_offsets` argument.
            - Aggregation function: Name of the `agg_func` function.
            - Window size: Value of the `window_size` attribute.
            - Series name: Name of the series provided by the user during training.
            - Training range: First and last values of index of the data used during training.
            - Training index type: Type of index of the data used during training.
            - Training index frequency: Frequency of index of the data used during training.
            - Creation date: Date of creation of the forecaster object.
            - Last fit date: Date of last fit of the forecaster object.
            - spotforecast version: Version of spotforecast library used to create the forecaster.
            - Python version: Version of python used to create the forecaster.
            - Forecaster id: Name used as an identifier of the forecaster.

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data = np.arange(14),
            ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
            >>> forecaster._repr_html_()  # doctest: +ELLIPSIS
            '<style>...</style><div class="container-...">...</div>'

        """

        style, unique_id = get_style_repr_html(self.is_fitted)

        content = f"""
        <div class="container-{unique_id}">
            <p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
            <details open>
                <summary>General Information</summary>
                <ul>
                    <li><strong>Estimator:</strong> {type(self.estimator).__name__}</li>
                    <li><strong>Offset:</strong> {self.offset}</li>
                    <li><strong>Number of offsets:</strong> {self.n_offsets}</li>
                    <li><strong>Aggregation function:</strong> {self.agg_func.__name__}</li>
                    <li><strong>Window size:</strong> {self.window_size}</li>
                    <li><strong>Creation date:</strong> {self.creation_date}</li>
                    <li><strong>Last fit date:</strong> {self.fit_date}</li>
                    <li><strong>spotforecast version:</strong> {self.spotforecast_version}</li>
                    <li><strong>Python version:</strong> {self.python_version}</li>
                    <li><strong>Forecaster id:</strong> {self.forecaster_id}</li>
                </ul>
            </details>
            <details>
                <summary>Training Information</summary>
                <ul>
                    <li><strong>Training range:</strong> {self.training_range_.to_list() if self.is_fitted else 'Not fitted'}</li>
                    <li><strong>Training index type:</strong> {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else 'Not fitted'}</li>
                    <li><strong>Training index frequency:</strong> {self.index_freq_ if self.is_fitted else 'Not fitted'}</li>
                </ul>
            </details>
        </div>
        """

        return (style + content).strip()

    def fit(
        self,
        y: pd.Series,
        store_in_sample_residuals: bool = False,
        random_state: int = 123,
        exog: Any = None,
    ) -> None:
        """
        Training Forecaster.

        Args:
            y (pandas Series): Training time series.
            store_in_sample_residuals (bool, optional): If `True`, in-sample
                residuals will be stored in the forecaster object after fitting
                (`in_sample_residuals_` and `in_sample_residuals_by_bin_` attributes).
                If `False`, only the intervals of the bins are stored. Defaults to False.
            random_state (int, optional): Set a seed for the random generator so
                that the stored sample residuals are always deterministic. Defaults to 123.
            exog (Ignored): Not used, present here for API consistency by convention.

        Returns:
            None

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data = np.arange(14),
            ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
        """

        if not isinstance(y, pd.Series):
            raise TypeError(
                f"`y` must be a pandas Series with a DatetimeIndex or a RangeIndex. "
                f"Found {type(y)}."
            )

        if isinstance(self.offset, pd.tseries.offsets.DateOffset):
            if not isinstance(y.index, pd.DatetimeIndex):
                raise TypeError(
                    "If `offset` is a pandas DateOffset, the index of `y` must be a "
                    "pandas DatetimeIndex with frequency."
                )
            elif y.index.freq is None:
                try:
                    y.index.freq = pd.infer_freq(y.index)
                except (ValueError, TypeError):
                    raise TypeError(
                        "If `offset` is a pandas DateOffset, the index of `y` must be a "
                        "pandas DatetimeIndex with frequency."
                    )
                if y.index.freq is None:
                    raise TypeError(
                        "If `offset` is a pandas DateOffset, the index of `y` must be a "
                        "pandas DatetimeIndex with frequency."
                    )

        # Reset values in case the forecaster has already been fitted.
        self.last_window_ = None
        self.index_type_ = None
        self.index_freq_ = None
        self.training_range_ = None
        self.series_name_in_ = None
        self.is_fitted = False

        _, y_index = check_extract_values_and_index(
            data=y, data_label="`y`", return_values=False
        )

        if isinstance(self.offset, pd.tseries.offsets.DateOffset):
            # Calculate the window_size in steps for compatibility with the
            # check_predict_input function. This is not a exact calculation
            # because the offset follows the calendar rules and the distance
            # between two dates may not be constant.
            first_valid_index = y_index[-1] - self.offset * self.n_offsets

            try:
                window_size_idx_start = y_index.get_loc(first_valid_index)
                window_size_idx_end = y_index.get_loc(y_index[-1])
                self.window_size = window_size_idx_end - window_size_idx_start
            except KeyError:
                raise ValueError(
                    f"The length of `y` ({len(y)}), must be greater than or equal "
                    f"to the window size ({self.window_size}). This is because  "
                    f"the offset ({self.offset}) is larger than the available "
                    f"data. Try to decrease the size of the offset ({self.offset}), "
                    f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                    f"size of `y`."
                )
        else:
            if len(y) <= self.window_size:
                raise ValueError(
                    f"Length of `y` must be greater than the maximum window size "
                    f"needed by the forecaster. This is because  "
                    f"the offset ({self.offset}) is larger than the available "
                    f"data. Try to decrease the size of the offset ({self.offset}), "
                    f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                    f"size of `y`.\n"
                    f"    Length `y`: {len(y)}.\n"
                    f"    Max window size: {self.window_size}.\n"
                )

        self.is_fitted = True
        self.series_name_in_ = y.name if y.name is not None else "y"
        self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
        self.training_range_ = y_index[[0, -1]]
        self.index_type_ = type(y_index)
        self.index_freq_ = (
            y_index.freqstr if isinstance(y_index, pd.DatetimeIndex) else y_index.step
        )

        # NOTE: This is done to save time during fit in functions such as backtesting()
        if self._probabilistic_mode is not False:
            self._binning_in_sample_residuals(
                y=y,
                store_in_sample_residuals=store_in_sample_residuals,
                random_state=random_state,
            )

        # The last time window of training data is stored so that equivalent
        # dates are available when calling the `predict` method.
        # Store the whole series to avoid errors when the offset is larger
        # than the data available.
        self.last_window_ = y.copy()

    def _binning_in_sample_residuals(
        self,
        y: pd.Series,
        store_in_sample_residuals: bool = False,
        random_state: int = 123,
    ) -> None:
        """
        Bin residuals according to the predicted value each residual is
        associated with. First a `spotforecast.preprocessing.QuantileBinner` object
        is fitted to the predicted values. Then, residuals are binned according
        to the predicted value each residual is associated with. Residuals are
        stored in the forecaster object as `in_sample_residuals_` and
        `in_sample_residuals_by_bin_`.

        The number of residuals stored per bin is limited to
        `10_000 // self.binner.n_bins_`. The total number of residuals stored is
        `10_000`.

        Args:
            y (pandas Series): Training time series.
            store_in_sample_residuals (bool, optional): If `True`, in-sample
                residuals will be stored in the forecaster object after fitting
                (`in_sample_residuals_` and `in_sample_residuals_by_bin_` attributes).
                If `False`, only the intervals of the bins are stored. Defaults to False.
            random_state (int, optional): Set a seed for the random generator so
                that the stored sample residuals are always deterministic. Defaults to 123.

        Returns:
            None

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data = np.arange(21, dtype=float),
            ...     index = pd.date_range(start='2022-01-01', periods=21, freq='D')
            ... )
            >>> forecaster = ForecasterEquivalentDate(
            ...     offset=7,
            ...     binner_kwargs={'n_bins': 2, 'random_state': 123}
            ... )
            >>> forecaster.fit(y=data, store_in_sample_residuals=True)
            >>> forecaster.in_sample_residuals_
            array([7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 7.])
            >>> forecaster.in_sample_residuals_by_bin_
            {0: array([7., 7., 7., 7., 7., 7., 7.]), 1: array([7., 7., 7., 7., 7., 7., 7.])}


        """

        if isinstance(self.offset, pd.tseries.offsets.DateOffset):
            y_preds = []
            for n_off in range(1, self.n_offsets + 1):
                idx = y.index - self.offset * n_off
                mask = idx >= y.index[0]
                y_pred = y.loc[idx[mask]]
                y_pred.index = y.index[-mask.sum() :]
                y_preds.append(y_pred)

            y_preds = pd.concat(y_preds, axis=1).to_numpy()
            y_true = y.to_numpy()[-len(y_preds) :]

        else:
            y_preds = [
                y.shift(self.offset * n_off)[self.window_size :]
                for n_off in range(1, self.n_offsets + 1)
            ]
            y_preds = np.column_stack(y_preds)
            y_true = y.to_numpy()[self.window_size :]

        y_pred = np.apply_along_axis(self.agg_func, axis=1, arr=y_preds)

        residuals = y_true - y_pred

        if self._probabilistic_mode == "binned":
            data = pd.DataFrame({"prediction": y_pred, "residuals": residuals}).dropna()
            y_pred = data["prediction"].to_numpy()
            residuals = data["residuals"].to_numpy()

            self.binner.fit(y_pred)
            self.binner_intervals_ = self.binner.intervals_

        if store_in_sample_residuals:
            rng = np.random.default_rng(seed=random_state)
            if self._probabilistic_mode == "binned":
                data["bin"] = self.binner.transform(y_pred).astype(int)
                self.in_sample_residuals_by_bin_ = (
                    data.groupby("bin")["residuals"].apply(np.array).to_dict()
                )

                max_sample = 10_000 // self.binner.n_bins_
                for k, v in self.in_sample_residuals_by_bin_.items():
                    if len(v) > max_sample:
                        sample = v[rng.integers(low=0, high=len(v), size=max_sample)]
                        self.in_sample_residuals_by_bin_[k] = sample

                for k in self.binner_intervals_.keys():
                    if k not in self.in_sample_residuals_by_bin_:
                        self.in_sample_residuals_by_bin_[k] = np.array([])

                empty_bins = [
                    k
                    for k, v in self.in_sample_residuals_by_bin_.items()
                    if v.size == 0
                ]
                if empty_bins:
                    empty_bin_size = min(max_sample, len(residuals))
                    for k in empty_bins:
                        self.in_sample_residuals_by_bin_[k] = rng.choice(
                            a=residuals, size=empty_bin_size, replace=False
                        )

            if len(residuals) > 10_000:
                residuals = residuals[
                    rng.integers(low=0, high=len(residuals), size=10_000)
                ]

            self.in_sample_residuals_ = residuals

    def predict(
        self,
        steps: int,
        last_window: pd.Series | None = None,
        check_inputs: bool = True,
        exog: Any = None,
    ) -> pd.Series:
        """
        Predict n steps ahead.

        Args:
            steps (int): Number of steps to predict.
            last_window (pandas Series, optional): Past values needed to select the
                last equivalent dates according to the offset. If `last_window = None`,
                the values stored in `self.last_window_` are used and the predictions
                start immediately after the training data. Defaults to None.
            check_inputs (bool, optional): If `True`, the input is checked for
                possible warnings and errors with the `check_predict_input` function.
                This argument is created for internal use and is not recommended to
                be changed. Defaults to True.
            exog (Ignored): Not used, present here for API consistency by convention.

        Returns:
            pd.Series: Predicted values.

        Raises:
            ValueError:
                If all equivalent values are missing when using a pandas DateOffset as offset.
                This can be caused by using an offset larger than the available data.
                To avoid this, try to decrease the size of the offset, the number of `n_offsets` or increase the size of `last_window`.
                In backtesting, this error may be caused by using an `initial_train_size` too small.
            Warning:
                If some equivalent values are missing when using a pandas DateOffset as offset.
                This can be caused by using an offset larger than the available data or by using an `initial_train_size` too small in backtesting.
                To avoid this, increase the `last_window` size or decrease the number of `n_offsets`.

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data = np.arange(14),
            ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
            >>> forecaster.predict(steps=3)
            2022-01-15    7
            2022-01-16    8
            2022-01-17    9
            Freq: D, Name: pred, dtype: int64
        """

        if last_window is None:
            last_window = self.last_window_

        if check_inputs:
            check_predict_input(
                forecaster_name=type(self).__name__,
                steps=steps,
                is_fitted=self.is_fitted,
                exog_in_=False,
                index_type_=self.index_type_,
                index_freq_=self.index_freq_,
                window_size=self.window_size,
                last_window=last_window,
            )

        prediction_index = expand_index(index=last_window.index, steps=steps)

        # Initialize to prevent use-before-initialization warnings
        predictions = None

        if isinstance(self.offset, int):

            last_window_values = last_window.to_numpy(copy=True).ravel()
            equivalent_indexes = np.tile(
                np.arange(-self.offset, 0), int(np.ceil(steps / self.offset))
            )
            equivalent_indexes = equivalent_indexes[:steps]

            if self.n_offsets == 1:
                equivalent_values = last_window_values[equivalent_indexes]
                predictions = equivalent_values.ravel()

            if self.n_offsets > 1:
                equivalent_indexes = [
                    equivalent_indexes - n * self.offset
                    for n in np.arange(self.n_offsets)
                ]
                equivalent_indexes = np.vstack(equivalent_indexes)
                equivalent_values = last_window_values[equivalent_indexes]
                predictions = np.apply_along_axis(
                    self.agg_func, axis=0, arr=equivalent_values
                )

            predictions = pd.Series(
                data=predictions, index=prediction_index, name="pred"
            )

        if isinstance(self.offset, pd.tseries.offsets.DateOffset):

            last_window = last_window.copy()
            max_allowed_date = last_window.index[-1]

            # For every date in prediction_index, calculate the n offsets
            offset_dates = []
            for date in prediction_index:
                selected_offsets = []
                while len(selected_offsets) < self.n_offsets:
                    offset_date = date - self.offset
                    if offset_date <= max_allowed_date:
                        selected_offsets.append(offset_date)
                    date = offset_date
                offset_dates.append(selected_offsets)

            offset_dates = np.array(offset_dates)

            # Select the values of the time series corresponding to the each
            # offset date. If the offset date is not in the time series, the
            # value is set to NaN.
            equivalent_values = (
                last_window.reindex(offset_dates.ravel())
                .to_numpy()
                .reshape(-1, self.n_offsets)
            )
            equivalent_values = pd.DataFrame(
                data=equivalent_values,
                index=prediction_index,
                columns=[f"offset_{i}" for i in range(self.n_offsets)],
            )

            # Error if all values are missing
            if equivalent_values.isnull().all().all():
                raise ValueError(
                    f"All equivalent values are missing. This is caused by using "
                    f"an offset ({self.offset}) larger than the available data. "
                    f"Try to decrease the size of the offset ({self.offset}), "
                    f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                    f"size of `last_window`. In backtesting, this error may be "
                    f"caused by using an `initial_train_size` too small."
                )

            # Warning if equivalent values are missing
            incomplete_offsets = equivalent_values.isnull().any(axis=1)
            incomplete_offsets = incomplete_offsets[incomplete_offsets].index
            if not incomplete_offsets.empty:
                warnings.warn(
                    f"Steps: {incomplete_offsets.strftime('%Y-%m-%d').to_list()} "
                    f"are calculated with less than {self.n_offsets} `n_offsets`. "
                    f"To avoid this, increase the `last_window` size or decrease "
                    f"the number of `n_offsets`. The current configuration requires "
                    f"a total offset of {self.offset * self.n_offsets}.",
                    MissingValuesWarning,
                )

            aggregate_values = equivalent_values.apply(self.agg_func, axis=1)
            predictions = aggregate_values.rename("pred")

        return predictions

    def predict_interval(
        self,
        steps: int,
        last_window: pd.Series | None = None,
        method: str = "conformal",
        interval: float | list[float] | tuple[float] = [5, 95],
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: Any = None,
        exog: Any = None,
        n_boot: Any = None,
    ) -> pd.DataFrame:
        """
        Predict n steps ahead and estimate prediction intervals using conformal
        prediction method. Refer to the References section for additional
        details on this method.

        Args:
            steps (int): Number of steps to predict.
            last_window (pandas Series, optional): Past values needed to select the
                last equivalent dates according to the offset. If `last_window = None`,
                the values stored in `self.last_window_` are used and the predictions
                start immediately after the training data. Defaults to None.
            method (str, optional): Technique used to estimate prediction intervals.
                Available options:
                - 'conformal': Employs the conformal prediction split method for
                interval estimation [1]_. Defaults to 'conformal'.
            interval (float, list, tuple, optional): Confidence level of the
                prediction interval. Interpretation depends on the method used:
                - If `float`, represents the nominal (expected) coverage (between 0
                and 1). For instance, `interval=0.95` corresponds to `[2.5, 97.5]`
                percentiles.
                - If `list` or `tuple`, defines the exact percentiles to compute,
                which must be between 0 and 100 inclusive. For example, interval
                of 95% should be as `interval = [2.5, 97.5]`.
                - When using `method='conformal'`, the interval must be a float or
                a list/tuple defining a symmetric interval. Defaults to [5, 95].
            use_in_sample_residuals (bool, optional): If `True`, residuals from the
                training data are used as proxy of prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals (bool, optional): If `True`, residuals are selected
                based on the predicted values (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.
            random_state (Ignored): Not used, present here for API consistency by convention.
            exog (Ignored): Not used, present here for API consistency by convention.
            n_boot (Ignored): Not used, present here for API consistency by convention.

        Returns:
            pd.DataFrame: Values predicted by the forecaster and their estimated interval.
                - pred: predictions.
                - lower_bound: lower bound of the interval.
                - upper_bound: upper bound of the interval.

        Raises:
            ValueError: If `method` is not 'conformal'.
            ValueError: If `interval` is not a float or a list/tuple defining a symmetric interval when using `method='conformal'`.

        References:
            .. [1] MAPIE - Model Agnostic Prediction Interval Estimator.
                https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data = np.arange(14, dtype=float),
            ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data, store_in_sample_residuals=True)
            >>> forecaster.predict_interval(steps=3, interval=0.8)
                        pred  lower_bound  upper_bound
            2022-01-15   7.0          6.0          8.0
            2022-01-16   8.0          7.0          9.0
            2022-01-17   9.0          8.0         10.0
        """

        if method != "conformal":
            raise ValueError(
                f"Method '{method}' is not supported. Only 'conformal' is available."
            )

        if last_window is None:
            last_window = self.last_window_

        check_predict_input(
            forecaster_name=type(self).__name__,
            steps=steps,
            is_fitted=self.is_fitted,
            exog_in_=False,
            index_type_=self.index_type_,
            index_freq_=self.index_freq_,
            window_size=self.window_size,
            last_window=last_window,
        )

        check_residuals_input(
            forecaster_name=type(self).__name__,
            use_in_sample_residuals=use_in_sample_residuals,
            in_sample_residuals_=self.in_sample_residuals_,
            out_sample_residuals_=self.out_sample_residuals_,
            use_binned_residuals=use_binned_residuals,
            in_sample_residuals_by_bin_=self.in_sample_residuals_by_bin_,
            out_sample_residuals_by_bin_=self.out_sample_residuals_by_bin_,
        )

        if isinstance(interval, (list, tuple)):
            check_interval(interval=interval, ensure_symmetric_intervals=True)
            nominal_coverage = (interval[1] - interval[0]) / 100
        else:
            check_interval(alpha=interval, alpha_literal="interval")
            nominal_coverage = interval

        if use_in_sample_residuals:
            residuals = self.in_sample_residuals_
            residuals_by_bin = self.in_sample_residuals_by_bin_
        else:
            residuals = self.out_sample_residuals_
            residuals_by_bin = self.out_sample_residuals_by_bin_

        prediction_index = expand_index(index=last_window.index, steps=steps)

        # Initialize to prevent use-before-initialization warnings
        predictions = None

        if isinstance(self.offset, int):

            last_window_values = last_window.to_numpy(copy=True).ravel()
            equivalent_indexes = np.tile(
                np.arange(-self.offset, 0), int(np.ceil(steps / self.offset))
            )
            equivalent_indexes = equivalent_indexes[:steps]

            if self.n_offsets == 1:
                equivalent_values = last_window_values[equivalent_indexes]
                predictions = equivalent_values.ravel()

            if self.n_offsets > 1:
                equivalent_indexes = [
                    equivalent_indexes - n * self.offset
                    for n in np.arange(self.n_offsets)
                ]
                equivalent_indexes = np.vstack(equivalent_indexes)
                equivalent_values = last_window_values[equivalent_indexes]
                predictions = np.apply_along_axis(
                    self.agg_func, axis=0, arr=equivalent_values
                )

        if isinstance(self.offset, pd.tseries.offsets.DateOffset):

            last_window = last_window.copy()
            max_allowed_date = last_window.index[-1]

            # For every date in prediction_index, calculate the n offsets
            offset_dates = []
            for date in prediction_index:
                selected_offsets = []
                while len(selected_offsets) < self.n_offsets:
                    offset_date = date - self.offset
                    if offset_date <= max_allowed_date:
                        selected_offsets.append(offset_date)
                    date = offset_date
                offset_dates.append(selected_offsets)

            offset_dates = np.array(offset_dates)

            # Select the values of the time series corresponding to the each
            # offset date. If the offset date is not in the time series, the
            # value is set to NaN.
            equivalent_values = (
                last_window.reindex(offset_dates.ravel())
                .to_numpy()
                .reshape(-1, self.n_offsets)
            )
            equivalent_values = pd.DataFrame(
                data=equivalent_values,
                index=prediction_index,
                columns=[f"offset_{i}" for i in range(self.n_offsets)],
            )

            # Error if all values are missing
            if equivalent_values.isnull().all().all():
                raise ValueError(
                    f"All equivalent values are missing. This is caused by using "
                    f"an offset ({self.offset}) larger than the available data. "
                    f"Try to decrease the size of the offset ({self.offset}), "
                    f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                    f"size of `last_window`. In backtesting, this error may be "
                    f"caused by using an `initial_train_size` too small."
                )

            # Warning if equivalent values are missing
            incomplete_offsets = equivalent_values.isnull().any(axis=1)
            incomplete_offsets = incomplete_offsets[incomplete_offsets].index
            if not incomplete_offsets.empty:
                warnings.warn(
                    f"Steps: {incomplete_offsets.strftime('%Y-%m-%d').to_list()} "
                    f"are calculated with less than {self.n_offsets} `n_offsets`. "
                    f"To avoid this, increase the `last_window` size or decrease "
                    f"the number of `n_offsets`. The current configuration requires "
                    f"a total offset of {self.offset * self.n_offsets}.",
                    MissingValuesWarning,
                )

            aggregate_values = equivalent_values.apply(self.agg_func, axis=1)
            predictions = aggregate_values.to_numpy()

        if use_binned_residuals:
            correction_factor_by_bin = {
                k: np.quantile(np.abs(v), nominal_coverage)
                for k, v in residuals_by_bin.items()
            }
            replace_func = np.vectorize(lambda x: correction_factor_by_bin[x])
            predictions_bin = self.binner.transform(predictions)
            correction_factor = replace_func(predictions_bin)
        else:
            correction_factor = np.quantile(np.abs(residuals), nominal_coverage)

        lower_bound = predictions - correction_factor
        upper_bound = predictions + correction_factor
        predictions = np.column_stack([predictions, lower_bound, upper_bound])

        predictions = pd.DataFrame(
            data=predictions,
            index=prediction_index,
            columns=["pred", "lower_bound", "upper_bound"],
        )

        return predictions

    def set_in_sample_residuals(
        self, y: pd.Series, random_state: int = 123, exog: Any = None
    ) -> None:
        """
        Set in-sample residuals in case they were not calculated during the
        training process.

        In-sample residuals are calculated as the difference between the true
        values and the predictions made by the forecaster using the training
        data. The following internal attributes are updated:

        + `in_sample_residuals_`: residuals stored in a numpy ndarray.
        + `binner_intervals_`: intervals used to bin the residuals are calculated
        using the quantiles of the predicted values.
        + `in_sample_residuals_by_bin_`: residuals are binned according to the
        predicted value they are associated with and stored in a dictionary, where
        the keys are the intervals of the predicted values and the values are
        the residuals associated with that range.

        A total of 10_000 residuals are stored in the attribute `in_sample_residuals_`.
        If the number of residuals is greater than 10_000, a random sample of
        10_000 residuals is stored. The number of residuals stored per bin is
        limited to `10_000 // self.binner.n_bins_`.

        Args:
            y (pandas Series): Training time series.
            random_state (int, optional): Sets a seed to the random sampling for
                reproducible output. Defaults to 123.
            exog (Ignored): Not used, present here for API consistency by convention.

        Returns:
            None

        Raises:
            NotFittedError: If the forecaster has not been fitted.
            IndexError: If the index range of `y` does not match the training range.

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data=np.arange(14, dtype=float),
            ...     index=pd.date_range(start="2022-01-01", periods=14, freq="D"),
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
            >>> # Recompute and store residuals if needed
            >>> forecaster.set_in_sample_residuals(y=data, random_state=123)
            >>> forecaster.in_sample_residuals_.shape
            (7,)

        """

        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `set_in_sample_residuals()`."
            )

        check_y(y=y)
        y_index_range = check_extract_values_and_index(
            data=y, data_label="`y`", return_values=False
        )[1][[0, -1]]
        if not y_index_range.equals(self.training_range_):
            raise IndexError(
                f"The index range of `y` does not match the range "
                f"used during training. Please ensure the index is aligned "
                f"with the training data.\n"
                f"    Expected : {self.training_range_}\n"
                f"    Received : {y_index_range}"
            )

        self._binning_in_sample_residuals(
            y=y, store_in_sample_residuals=True, random_state=random_state
        )

    def set_out_sample_residuals(
        self,
        y_true: np.ndarray | pd.Series,
        y_pred: np.ndarray | pd.Series,
        append: bool = False,
        random_state: int = 123,
    ) -> None:
        """
        Set new values to the attribute `out_sample_residuals_`. Out of sample
        residuals are meant to be calculated using observations that did not
        participate in the training process. Two internal attributes are updated:

        + `out_sample_residuals_`: residuals stored in a numpy ndarray.
        + `out_sample_residuals_by_bin_`: residuals are binned according to the
        predicted value they are associated with and stored in a dictionary, where
        the keys are the  intervals of the predicted values and the values are
        the residuals associated with that range. If a bin binning is empty, it
        is filled with a random sample of residuals from other bins. This is done
        to ensure that all bins have at least one residual and can be used in the
        prediction process.

        A total of 10_000 residuals are stored in the attribute `out_sample_residuals_`.
        If the number of residuals is greater than 10_000, a random sample of
        10_000 residuals is stored. The number of residuals stored per bin is
        limited to `10_000 // self.binner.n_bins_`.

        Args:
            y_true (numpy ndarray, pandas Series): True values of the time series
                from which the residuals have been calculated.
            y_pred (numpy ndarray, pandas Series): Predicted values of the time series.
            append (bool, optional): If `True`, new residuals are added to the once
                already stored in the forecaster. If after appending the new
                residuals, the limit of `10_000 // self.binner.n_bins_` values per
                bin is reached, a random sample of residuals is stored. Defaults
                to False.
            random_state (int, optional): Sets a seed to the random sampling for
                reproducible output. Defaults to 123.

        Returns:
            None

        Raises:
            NotFittedError: If the forecaster has not been fitted.
            TypeError: If `y_true` or `y_pred` are not numpy arrays or pandas Series.
            ValueError: If `y_true` and `y_pred` have different lengths.
            ValueError: If `y_true` and `y_pred` are pandas Series with different indexes.

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data=np.arange(21, dtype=float),
            ...     index=pd.date_range(start="2022-01-01", periods=21, freq="D"),
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
            >>> preds = forecaster.predict(steps=7)
            >>> y_true = pd.Series(data[-7:].to_numpy(), index=preds.index)
            >>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=preds)
            >>> forecaster.out_sample_residuals_.shape
            (7,)

        """

        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `set_out_sample_residuals()`."
            )

        if not isinstance(y_true, (np.ndarray, pd.Series)):
            raise TypeError(
                f"`y_true` argument must be `numpy ndarray` or `pandas Series`. "
                f"Got {type(y_true)}."
            )

        if not isinstance(y_pred, (np.ndarray, pd.Series)):
            raise TypeError(
                f"`y_pred` argument must be `numpy ndarray` or `pandas Series`. "
                f"Got {type(y_pred)}."
            )

        if len(y_true) != len(y_pred):
            raise ValueError(
                f"`y_true` and `y_pred` must have the same length. "
                f"Got {len(y_true)} and {len(y_pred)}."
            )

        if isinstance(y_true, pd.Series) and isinstance(y_pred, pd.Series):
            if not y_true.index.equals(y_pred.index):
                raise ValueError("`y_true` and `y_pred` must have the same index.")

        if not isinstance(y_pred, np.ndarray):
            y_pred = y_pred.to_numpy()
        if not isinstance(y_true, np.ndarray):
            y_true = y_true.to_numpy()

        data = pd.DataFrame(
            {"prediction": y_pred, "residuals": y_true - y_pred}
        ).dropna()
        y_pred = data["prediction"].to_numpy()
        residuals = data["residuals"].to_numpy()

        data["bin"] = self.binner.transform(y_pred).astype(int)
        residuals_by_bin = data.groupby("bin")["residuals"].apply(np.array).to_dict()

        out_sample_residuals = (
            np.array([])
            if self.out_sample_residuals_ is None
            else self.out_sample_residuals_
        )
        out_sample_residuals_by_bin = (
            {}
            if self.out_sample_residuals_by_bin_ is None
            else self.out_sample_residuals_by_bin_
        )
        if append:
            out_sample_residuals = np.concatenate([out_sample_residuals, residuals])
            for k, v in residuals_by_bin.items():
                if k in out_sample_residuals_by_bin:
                    out_sample_residuals_by_bin[k] = np.concatenate(
                        (out_sample_residuals_by_bin[k], v)
                    )
                else:
                    out_sample_residuals_by_bin[k] = v
        else:
            out_sample_residuals = residuals
            out_sample_residuals_by_bin = residuals_by_bin

        max_samples = 10_000 // self.binner.n_bins_
        rng = np.random.default_rng(seed=random_state)
        for k, v in out_sample_residuals_by_bin.items():
            if len(v) > max_samples:
                sample = rng.choice(a=v, size=max_samples, replace=False)
                out_sample_residuals_by_bin[k] = sample

        bin_keys = (
            [] if self.binner_intervals_ is None else self.binner_intervals_.keys()
        )
        for k in bin_keys:
            if k not in out_sample_residuals_by_bin:
                out_sample_residuals_by_bin[k] = np.array([])

        empty_bins = [k for k, v in out_sample_residuals_by_bin.items() if v.size == 0]
        if empty_bins:
            warnings.warn(
                f"The following bins have no out of sample residuals: {empty_bins}. "
                f"No predicted values fall in the interval "
                f"{[self.binner_intervals_[bin] for bin in empty_bins]}. "
                f"Empty bins will be filled with a random sample of residuals.",
                ResidualsUsageWarning,
            )
            empty_bin_size = min(max_samples, len(out_sample_residuals))
            for k in empty_bins:
                out_sample_residuals_by_bin[k] = rng.choice(
                    a=out_sample_residuals, size=empty_bin_size, replace=False
                )

        if len(out_sample_residuals) > 10_000:
            out_sample_residuals = rng.choice(
                a=out_sample_residuals, size=10_000, replace=False
            )

        self.out_sample_residuals_ = out_sample_residuals
        self.out_sample_residuals_by_bin_ = out_sample_residuals_by_bin

    def get_tags(self) -> dict[str, Any]:
        """
        Return the tags that characterize the behavior of the forecaster.

        Returns:
            dict: Dictionary with forecaster tags.

        Examples:
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> tags = forecaster.get_tags()
            >>> sorted(tags.keys())[:3]
            ['allowed_input_types_exog', 'allowed_input_types_series', 'forecaster_name']
        """

        return self.__spotforecast_tags__

    def summary(self) -> None:
        """
        Show forecaster information.

        Returns:
            None

        Examples:
            >>> import pandas as pd
            >>> import numpy as np
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
            >>> data = pd.Series(
            ...     data=np.arange(14, dtype=float),
            ...     index=pd.date_range(start="2022-01-01", periods=14, freq="D"),
            ... )
            >>> forecaster = ForecasterEquivalentDate(offset=7)
            >>> forecaster.fit(y=data)
            >>> forecaster.summary()  # doctest: +ELLIPSIS
            ============================
        """

        print(self)

__repr__()

Information displayed when a Forecaster object is printed.

Returns:

Name Type Description
str str

Information about the forecaster. It contains the following information:

str
  • Offset: Value of the offset argument.
str
  • Number of offsets: Value of the n_offsets argument.
str
  • Aggregation function: Name of the agg_func function.
str
  • Window size: Value of the window_size attribute.
str
  • Series name: Name of the series provided by the user during training.
str
  • Training range: First and last values of index of the data used during training.
str
  • Training index type: Type of index of the data used during training.
str
  • Training index frequency: Frequency of index of the data used during training.
str
  • Creation date: Date of creation of the forecaster object.
str
  • Last fit date: Date of last fit of the forecaster object.
str
  • spotforecast version: Version of spotforecast library used to create the forecaster.
str
  • Python version: Version of python used to create the forecaster.
str
  • Forecaster id: Name used as an identifier of the forecaster.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data = np.arange(14),
...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
>>> print(forecaster)
============================
ForecasterEquivalentDate
============================
Offset: 7
Number of offsets: 1
Aggregation function: mean
Window size: 7
Series name: y
Training range: [Timestamp('2022-01-01 00:00:00'), Timestamp('2022-01-14 00:00:00')]
Training index type: DatetimeIndex
Training index frequency: D
Creation date: 2023-11-19 12:00:00
Last fit date: 2023-11-19 12:00:00
spotforecast version: 1.0.0
Python version: 3.8.10
Forecaster id: None
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def __repr__(self) -> str:
    """
    Information displayed when a Forecaster object is printed.

    Returns:
        str: Information about the forecaster. It contains the following information:
        - Offset: Value of the `offset` argument.
        - Number of offsets: Value of the `n_offsets` argument.
        - Aggregation function: Name of the `agg_func` function.
        - Window size: Value of the `window_size` attribute.
        - Series name: Name of the series provided by the user during training.
        - Training range: First and last values of index of the data used during training.
        - Training index type: Type of index of the data used during training.
        - Training index frequency: Frequency of index of the data used during training.
        - Creation date: Date of creation of the forecaster object.
        - Last fit date: Date of last fit of the forecaster object.
        - spotforecast version: Version of spotforecast library used to create the forecaster.
        - Python version: Version of python used to create the forecaster.
        - Forecaster id: Name used as an identifier of the forecaster.


    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data = np.arange(14),
        ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
        >>> print(forecaster)
        ============================
        ForecasterEquivalentDate
        ============================
        Offset: 7
        Number of offsets: 1
        Aggregation function: mean
        Window size: 7
        Series name: y
        Training range: [Timestamp('2022-01-01 00:00:00'), Timestamp('2022-01-14 00:00:00')]
        Training index type: DatetimeIndex
        Training index frequency: D
        Creation date: 2023-11-19 12:00:00
        Last fit date: 2023-11-19 12:00:00
        spotforecast version: 1.0.0
        Python version: 3.8.10
        Forecaster id: None

    """

    info = (
        f"{'=' * len(type(self).__name__)} \n"
        f"{type(self).__name__} \n"
        f"{'=' * len(type(self).__name__)} \n"
        f"Offset: {self.offset} \n"
        f"Number of offsets: {self.n_offsets} \n"
        f"Aggregation function: {self.agg_func.__name__} \n"
        f"Window size: {self.window_size} \n"
        f"Series name: {self.series_name_in_} \n"
        f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
        f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
        f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
        f"Creation date: {self.creation_date} \n"
        f"Last fit date: {self.fit_date} \n"
        f"spotforecast version: {self.spotforecast_version} \n"
        f"Python version: {self.python_version} \n"
        f"Forecaster id: {self.forecaster_id} \n"
    )

    return info

fit(y, store_in_sample_residuals=False, random_state=123, exog=None)

Training Forecaster.

Parameters:

Name Type Description Default
y pandas Series

Training time series.

required
store_in_sample_residuals bool

If True, in-sample residuals will be stored in the forecaster object after fitting (in_sample_residuals_ and in_sample_residuals_by_bin_ attributes). If False, only the intervals of the bins are stored. Defaults to False.

False
random_state int

Set a seed for the random generator so that the stored sample residuals are always deterministic. Defaults to 123.

123
exog Ignored

Not used, present here for API consistency by convention.

None

Returns:

Type Description
None

None

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data = np.arange(14),
...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def fit(
    self,
    y: pd.Series,
    store_in_sample_residuals: bool = False,
    random_state: int = 123,
    exog: Any = None,
) -> None:
    """
    Training Forecaster.

    Args:
        y (pandas Series): Training time series.
        store_in_sample_residuals (bool, optional): If `True`, in-sample
            residuals will be stored in the forecaster object after fitting
            (`in_sample_residuals_` and `in_sample_residuals_by_bin_` attributes).
            If `False`, only the intervals of the bins are stored. Defaults to False.
        random_state (int, optional): Set a seed for the random generator so
            that the stored sample residuals are always deterministic. Defaults to 123.
        exog (Ignored): Not used, present here for API consistency by convention.

    Returns:
        None

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data = np.arange(14),
        ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
    """

    if not isinstance(y, pd.Series):
        raise TypeError(
            f"`y` must be a pandas Series with a DatetimeIndex or a RangeIndex. "
            f"Found {type(y)}."
        )

    if isinstance(self.offset, pd.tseries.offsets.DateOffset):
        if not isinstance(y.index, pd.DatetimeIndex):
            raise TypeError(
                "If `offset` is a pandas DateOffset, the index of `y` must be a "
                "pandas DatetimeIndex with frequency."
            )
        elif y.index.freq is None:
            try:
                y.index.freq = pd.infer_freq(y.index)
            except (ValueError, TypeError):
                raise TypeError(
                    "If `offset` is a pandas DateOffset, the index of `y` must be a "
                    "pandas DatetimeIndex with frequency."
                )
            if y.index.freq is None:
                raise TypeError(
                    "If `offset` is a pandas DateOffset, the index of `y` must be a "
                    "pandas DatetimeIndex with frequency."
                )

    # Reset values in case the forecaster has already been fitted.
    self.last_window_ = None
    self.index_type_ = None
    self.index_freq_ = None
    self.training_range_ = None
    self.series_name_in_ = None
    self.is_fitted = False

    _, y_index = check_extract_values_and_index(
        data=y, data_label="`y`", return_values=False
    )

    if isinstance(self.offset, pd.tseries.offsets.DateOffset):
        # Calculate the window_size in steps for compatibility with the
        # check_predict_input function. This is not a exact calculation
        # because the offset follows the calendar rules and the distance
        # between two dates may not be constant.
        first_valid_index = y_index[-1] - self.offset * self.n_offsets

        try:
            window_size_idx_start = y_index.get_loc(first_valid_index)
            window_size_idx_end = y_index.get_loc(y_index[-1])
            self.window_size = window_size_idx_end - window_size_idx_start
        except KeyError:
            raise ValueError(
                f"The length of `y` ({len(y)}), must be greater than or equal "
                f"to the window size ({self.window_size}). This is because  "
                f"the offset ({self.offset}) is larger than the available "
                f"data. Try to decrease the size of the offset ({self.offset}), "
                f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                f"size of `y`."
            )
    else:
        if len(y) <= self.window_size:
            raise ValueError(
                f"Length of `y` must be greater than the maximum window size "
                f"needed by the forecaster. This is because  "
                f"the offset ({self.offset}) is larger than the available "
                f"data. Try to decrease the size of the offset ({self.offset}), "
                f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                f"size of `y`.\n"
                f"    Length `y`: {len(y)}.\n"
                f"    Max window size: {self.window_size}.\n"
            )

    self.is_fitted = True
    self.series_name_in_ = y.name if y.name is not None else "y"
    self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
    self.training_range_ = y_index[[0, -1]]
    self.index_type_ = type(y_index)
    self.index_freq_ = (
        y_index.freqstr if isinstance(y_index, pd.DatetimeIndex) else y_index.step
    )

    # NOTE: This is done to save time during fit in functions such as backtesting()
    if self._probabilistic_mode is not False:
        self._binning_in_sample_residuals(
            y=y,
            store_in_sample_residuals=store_in_sample_residuals,
            random_state=random_state,
        )

    # The last time window of training data is stored so that equivalent
    # dates are available when calling the `predict` method.
    # Store the whole series to avoid errors when the offset is larger
    # than the data available.
    self.last_window_ = y.copy()

get_tags()

Return the tags that characterize the behavior of the forecaster.

Returns:

Name Type Description
dict dict[str, Any]

Dictionary with forecaster tags.

Examples:

>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> tags = forecaster.get_tags()
>>> sorted(tags.keys())[:3]
['allowed_input_types_exog', 'allowed_input_types_series', 'forecaster_name']
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def get_tags(self) -> dict[str, Any]:
    """
    Return the tags that characterize the behavior of the forecaster.

    Returns:
        dict: Dictionary with forecaster tags.

    Examples:
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> tags = forecaster.get_tags()
        >>> sorted(tags.keys())[:3]
        ['allowed_input_types_exog', 'allowed_input_types_series', 'forecaster_name']
    """

    return self.__spotforecast_tags__

predict(steps, last_window=None, check_inputs=True, exog=None)

Predict n steps ahead.

Parameters:

Name Type Description Default
steps int

Number of steps to predict.

required
last_window pandas Series

Past values needed to select the last equivalent dates according to the offset. If last_window = None, the values stored in self.last_window_ are used and the predictions start immediately after the training data. Defaults to None.

None
check_inputs bool

If True, the input is checked for possible warnings and errors with the check_predict_input function. This argument is created for internal use and is not recommended to be changed. Defaults to True.

True
exog Ignored

Not used, present here for API consistency by convention.

None

Returns:

Type Description
Series

pd.Series: Predicted values.

Raises:

Type Description
ValueError

If all equivalent values are missing when using a pandas DateOffset as offset. This can be caused by using an offset larger than the available data. To avoid this, try to decrease the size of the offset, the number of n_offsets or increase the size of last_window. In backtesting, this error may be caused by using an initial_train_size too small.

Warning

If some equivalent values are missing when using a pandas DateOffset as offset. This can be caused by using an offset larger than the available data or by using an initial_train_size too small in backtesting. To avoid this, increase the last_window size or decrease the number of n_offsets.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data = np.arange(14),
...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
>>> forecaster.predict(steps=3)
2022-01-15    7
2022-01-16    8
2022-01-17    9
Freq: D, Name: pred, dtype: int64
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def predict(
    self,
    steps: int,
    last_window: pd.Series | None = None,
    check_inputs: bool = True,
    exog: Any = None,
) -> pd.Series:
    """
    Predict n steps ahead.

    Args:
        steps (int): Number of steps to predict.
        last_window (pandas Series, optional): Past values needed to select the
            last equivalent dates according to the offset. If `last_window = None`,
            the values stored in `self.last_window_` are used and the predictions
            start immediately after the training data. Defaults to None.
        check_inputs (bool, optional): If `True`, the input is checked for
            possible warnings and errors with the `check_predict_input` function.
            This argument is created for internal use and is not recommended to
            be changed. Defaults to True.
        exog (Ignored): Not used, present here for API consistency by convention.

    Returns:
        pd.Series: Predicted values.

    Raises:
        ValueError:
            If all equivalent values are missing when using a pandas DateOffset as offset.
            This can be caused by using an offset larger than the available data.
            To avoid this, try to decrease the size of the offset, the number of `n_offsets` or increase the size of `last_window`.
            In backtesting, this error may be caused by using an `initial_train_size` too small.
        Warning:
            If some equivalent values are missing when using a pandas DateOffset as offset.
            This can be caused by using an offset larger than the available data or by using an `initial_train_size` too small in backtesting.
            To avoid this, increase the `last_window` size or decrease the number of `n_offsets`.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data = np.arange(14),
        ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
        >>> forecaster.predict(steps=3)
        2022-01-15    7
        2022-01-16    8
        2022-01-17    9
        Freq: D, Name: pred, dtype: int64
    """

    if last_window is None:
        last_window = self.last_window_

    if check_inputs:
        check_predict_input(
            forecaster_name=type(self).__name__,
            steps=steps,
            is_fitted=self.is_fitted,
            exog_in_=False,
            index_type_=self.index_type_,
            index_freq_=self.index_freq_,
            window_size=self.window_size,
            last_window=last_window,
        )

    prediction_index = expand_index(index=last_window.index, steps=steps)

    # Initialize to prevent use-before-initialization warnings
    predictions = None

    if isinstance(self.offset, int):

        last_window_values = last_window.to_numpy(copy=True).ravel()
        equivalent_indexes = np.tile(
            np.arange(-self.offset, 0), int(np.ceil(steps / self.offset))
        )
        equivalent_indexes = equivalent_indexes[:steps]

        if self.n_offsets == 1:
            equivalent_values = last_window_values[equivalent_indexes]
            predictions = equivalent_values.ravel()

        if self.n_offsets > 1:
            equivalent_indexes = [
                equivalent_indexes - n * self.offset
                for n in np.arange(self.n_offsets)
            ]
            equivalent_indexes = np.vstack(equivalent_indexes)
            equivalent_values = last_window_values[equivalent_indexes]
            predictions = np.apply_along_axis(
                self.agg_func, axis=0, arr=equivalent_values
            )

        predictions = pd.Series(
            data=predictions, index=prediction_index, name="pred"
        )

    if isinstance(self.offset, pd.tseries.offsets.DateOffset):

        last_window = last_window.copy()
        max_allowed_date = last_window.index[-1]

        # For every date in prediction_index, calculate the n offsets
        offset_dates = []
        for date in prediction_index:
            selected_offsets = []
            while len(selected_offsets) < self.n_offsets:
                offset_date = date - self.offset
                if offset_date <= max_allowed_date:
                    selected_offsets.append(offset_date)
                date = offset_date
            offset_dates.append(selected_offsets)

        offset_dates = np.array(offset_dates)

        # Select the values of the time series corresponding to the each
        # offset date. If the offset date is not in the time series, the
        # value is set to NaN.
        equivalent_values = (
            last_window.reindex(offset_dates.ravel())
            .to_numpy()
            .reshape(-1, self.n_offsets)
        )
        equivalent_values = pd.DataFrame(
            data=equivalent_values,
            index=prediction_index,
            columns=[f"offset_{i}" for i in range(self.n_offsets)],
        )

        # Error if all values are missing
        if equivalent_values.isnull().all().all():
            raise ValueError(
                f"All equivalent values are missing. This is caused by using "
                f"an offset ({self.offset}) larger than the available data. "
                f"Try to decrease the size of the offset ({self.offset}), "
                f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                f"size of `last_window`. In backtesting, this error may be "
                f"caused by using an `initial_train_size` too small."
            )

        # Warning if equivalent values are missing
        incomplete_offsets = equivalent_values.isnull().any(axis=1)
        incomplete_offsets = incomplete_offsets[incomplete_offsets].index
        if not incomplete_offsets.empty:
            warnings.warn(
                f"Steps: {incomplete_offsets.strftime('%Y-%m-%d').to_list()} "
                f"are calculated with less than {self.n_offsets} `n_offsets`. "
                f"To avoid this, increase the `last_window` size or decrease "
                f"the number of `n_offsets`. The current configuration requires "
                f"a total offset of {self.offset * self.n_offsets}.",
                MissingValuesWarning,
            )

        aggregate_values = equivalent_values.apply(self.agg_func, axis=1)
        predictions = aggregate_values.rename("pred")

    return predictions

predict_interval(steps, last_window=None, method='conformal', interval=[5, 95], use_in_sample_residuals=True, use_binned_residuals=True, random_state=None, exog=None, n_boot=None)

Predict n steps ahead and estimate prediction intervals using conformal prediction method. Refer to the References section for additional details on this method.

Parameters:

Name Type Description Default
steps int

Number of steps to predict.

required
last_window pandas Series

Past values needed to select the last equivalent dates according to the offset. If last_window = None, the values stored in self.last_window_ are used and the predictions start immediately after the training data. Defaults to None.

None
method str

Technique used to estimate prediction intervals. Available options: - 'conformal': Employs the conformal prediction split method for interval estimation [1]_. Defaults to 'conformal'.

'conformal'
interval (float, list, tuple)

Confidence level of the prediction interval. Interpretation depends on the method used: - If float, represents the nominal (expected) coverage (between 0 and 1). For instance, interval=0.95 corresponds to [2.5, 97.5] percentiles. - If list or tuple, defines the exact percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5]. - When using method='conformal', the interval must be a float or a list/tuple defining a symmetric interval. Defaults to [5, 95].

[5, 95]
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method. Defaults to True.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly. Defaults to True.

True
random_state Ignored

Not used, present here for API consistency by convention.

None
exog Ignored

Not used, present here for API consistency by convention.

None
n_boot Ignored

Not used, present here for API consistency by convention.

None

Returns:

Type Description
DataFrame

pd.DataFrame: Values predicted by the forecaster and their estimated interval. - pred: predictions. - lower_bound: lower bound of the interval. - upper_bound: upper bound of the interval.

Raises:

Type Description
ValueError

If method is not 'conformal'.

ValueError

If interval is not a float or a list/tuple defining a symmetric interval when using method='conformal'.

References

.. [1] MAPIE - Model Agnostic Prediction Interval Estimator. https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data = np.arange(14, dtype=float),
...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data, store_in_sample_residuals=True)
>>> forecaster.predict_interval(steps=3, interval=0.8)
            pred  lower_bound  upper_bound
2022-01-15   7.0          6.0          8.0
2022-01-16   8.0          7.0          9.0
2022-01-17   9.0          8.0         10.0
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def predict_interval(
    self,
    steps: int,
    last_window: pd.Series | None = None,
    method: str = "conformal",
    interval: float | list[float] | tuple[float] = [5, 95],
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: Any = None,
    exog: Any = None,
    n_boot: Any = None,
) -> pd.DataFrame:
    """
    Predict n steps ahead and estimate prediction intervals using conformal
    prediction method. Refer to the References section for additional
    details on this method.

    Args:
        steps (int): Number of steps to predict.
        last_window (pandas Series, optional): Past values needed to select the
            last equivalent dates according to the offset. If `last_window = None`,
            the values stored in `self.last_window_` are used and the predictions
            start immediately after the training data. Defaults to None.
        method (str, optional): Technique used to estimate prediction intervals.
            Available options:
            - 'conformal': Employs the conformal prediction split method for
            interval estimation [1]_. Defaults to 'conformal'.
        interval (float, list, tuple, optional): Confidence level of the
            prediction interval. Interpretation depends on the method used:
            - If `float`, represents the nominal (expected) coverage (between 0
            and 1). For instance, `interval=0.95` corresponds to `[2.5, 97.5]`
            percentiles.
            - If `list` or `tuple`, defines the exact percentiles to compute,
            which must be between 0 and 100 inclusive. For example, interval
            of 95% should be as `interval = [2.5, 97.5]`.
            - When using `method='conformal'`, the interval must be a float or
            a list/tuple defining a symmetric interval. Defaults to [5, 95].
        use_in_sample_residuals (bool, optional): If `True`, residuals from the
            training data are used as proxy of prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method. Defaults to True.
        use_binned_residuals (bool, optional): If `True`, residuals are selected
            based on the predicted values (binned selection).
            If `False`, residuals are selected randomly. Defaults to True.
        random_state (Ignored): Not used, present here for API consistency by convention.
        exog (Ignored): Not used, present here for API consistency by convention.
        n_boot (Ignored): Not used, present here for API consistency by convention.

    Returns:
        pd.DataFrame: Values predicted by the forecaster and their estimated interval.
            - pred: predictions.
            - lower_bound: lower bound of the interval.
            - upper_bound: upper bound of the interval.

    Raises:
        ValueError: If `method` is not 'conformal'.
        ValueError: If `interval` is not a float or a list/tuple defining a symmetric interval when using `method='conformal'`.

    References:
        .. [1] MAPIE - Model Agnostic Prediction Interval Estimator.
            https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data = np.arange(14, dtype=float),
        ...     index = pd.date_range(start='2022-01-01', periods=14, freq='D')
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data, store_in_sample_residuals=True)
        >>> forecaster.predict_interval(steps=3, interval=0.8)
                    pred  lower_bound  upper_bound
        2022-01-15   7.0          6.0          8.0
        2022-01-16   8.0          7.0          9.0
        2022-01-17   9.0          8.0         10.0
    """

    if method != "conformal":
        raise ValueError(
            f"Method '{method}' is not supported. Only 'conformal' is available."
        )

    if last_window is None:
        last_window = self.last_window_

    check_predict_input(
        forecaster_name=type(self).__name__,
        steps=steps,
        is_fitted=self.is_fitted,
        exog_in_=False,
        index_type_=self.index_type_,
        index_freq_=self.index_freq_,
        window_size=self.window_size,
        last_window=last_window,
    )

    check_residuals_input(
        forecaster_name=type(self).__name__,
        use_in_sample_residuals=use_in_sample_residuals,
        in_sample_residuals_=self.in_sample_residuals_,
        out_sample_residuals_=self.out_sample_residuals_,
        use_binned_residuals=use_binned_residuals,
        in_sample_residuals_by_bin_=self.in_sample_residuals_by_bin_,
        out_sample_residuals_by_bin_=self.out_sample_residuals_by_bin_,
    )

    if isinstance(interval, (list, tuple)):
        check_interval(interval=interval, ensure_symmetric_intervals=True)
        nominal_coverage = (interval[1] - interval[0]) / 100
    else:
        check_interval(alpha=interval, alpha_literal="interval")
        nominal_coverage = interval

    if use_in_sample_residuals:
        residuals = self.in_sample_residuals_
        residuals_by_bin = self.in_sample_residuals_by_bin_
    else:
        residuals = self.out_sample_residuals_
        residuals_by_bin = self.out_sample_residuals_by_bin_

    prediction_index = expand_index(index=last_window.index, steps=steps)

    # Initialize to prevent use-before-initialization warnings
    predictions = None

    if isinstance(self.offset, int):

        last_window_values = last_window.to_numpy(copy=True).ravel()
        equivalent_indexes = np.tile(
            np.arange(-self.offset, 0), int(np.ceil(steps / self.offset))
        )
        equivalent_indexes = equivalent_indexes[:steps]

        if self.n_offsets == 1:
            equivalent_values = last_window_values[equivalent_indexes]
            predictions = equivalent_values.ravel()

        if self.n_offsets > 1:
            equivalent_indexes = [
                equivalent_indexes - n * self.offset
                for n in np.arange(self.n_offsets)
            ]
            equivalent_indexes = np.vstack(equivalent_indexes)
            equivalent_values = last_window_values[equivalent_indexes]
            predictions = np.apply_along_axis(
                self.agg_func, axis=0, arr=equivalent_values
            )

    if isinstance(self.offset, pd.tseries.offsets.DateOffset):

        last_window = last_window.copy()
        max_allowed_date = last_window.index[-1]

        # For every date in prediction_index, calculate the n offsets
        offset_dates = []
        for date in prediction_index:
            selected_offsets = []
            while len(selected_offsets) < self.n_offsets:
                offset_date = date - self.offset
                if offset_date <= max_allowed_date:
                    selected_offsets.append(offset_date)
                date = offset_date
            offset_dates.append(selected_offsets)

        offset_dates = np.array(offset_dates)

        # Select the values of the time series corresponding to the each
        # offset date. If the offset date is not in the time series, the
        # value is set to NaN.
        equivalent_values = (
            last_window.reindex(offset_dates.ravel())
            .to_numpy()
            .reshape(-1, self.n_offsets)
        )
        equivalent_values = pd.DataFrame(
            data=equivalent_values,
            index=prediction_index,
            columns=[f"offset_{i}" for i in range(self.n_offsets)],
        )

        # Error if all values are missing
        if equivalent_values.isnull().all().all():
            raise ValueError(
                f"All equivalent values are missing. This is caused by using "
                f"an offset ({self.offset}) larger than the available data. "
                f"Try to decrease the size of the offset ({self.offset}), "
                f"the number of `n_offsets` ({self.n_offsets}) or increase the "
                f"size of `last_window`. In backtesting, this error may be "
                f"caused by using an `initial_train_size` too small."
            )

        # Warning if equivalent values are missing
        incomplete_offsets = equivalent_values.isnull().any(axis=1)
        incomplete_offsets = incomplete_offsets[incomplete_offsets].index
        if not incomplete_offsets.empty:
            warnings.warn(
                f"Steps: {incomplete_offsets.strftime('%Y-%m-%d').to_list()} "
                f"are calculated with less than {self.n_offsets} `n_offsets`. "
                f"To avoid this, increase the `last_window` size or decrease "
                f"the number of `n_offsets`. The current configuration requires "
                f"a total offset of {self.offset * self.n_offsets}.",
                MissingValuesWarning,
            )

        aggregate_values = equivalent_values.apply(self.agg_func, axis=1)
        predictions = aggregate_values.to_numpy()

    if use_binned_residuals:
        correction_factor_by_bin = {
            k: np.quantile(np.abs(v), nominal_coverage)
            for k, v in residuals_by_bin.items()
        }
        replace_func = np.vectorize(lambda x: correction_factor_by_bin[x])
        predictions_bin = self.binner.transform(predictions)
        correction_factor = replace_func(predictions_bin)
    else:
        correction_factor = np.quantile(np.abs(residuals), nominal_coverage)

    lower_bound = predictions - correction_factor
    upper_bound = predictions + correction_factor
    predictions = np.column_stack([predictions, lower_bound, upper_bound])

    predictions = pd.DataFrame(
        data=predictions,
        index=prediction_index,
        columns=["pred", "lower_bound", "upper_bound"],
    )

    return predictions

set_in_sample_residuals(y, random_state=123, exog=None)

Set in-sample residuals in case they were not calculated during the training process.

In-sample residuals are calculated as the difference between the true values and the predictions made by the forecaster using the training data. The following internal attributes are updated:

  • in_sample_residuals_: residuals stored in a numpy ndarray.
  • binner_intervals_: intervals used to bin the residuals are calculated using the quantiles of the predicted values.
  • in_sample_residuals_by_bin_: residuals are binned according to the predicted value they are associated with and stored in a dictionary, where the keys are the intervals of the predicted values and the values are the residuals associated with that range.

A total of 10_000 residuals are stored in the attribute in_sample_residuals_. If the number of residuals is greater than 10_000, a random sample of 10_000 residuals is stored. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_.

Parameters:

Name Type Description Default
y pandas Series

Training time series.

required
random_state int

Sets a seed to the random sampling for reproducible output. Defaults to 123.

123
exog Ignored

Not used, present here for API consistency by convention.

None

Returns:

Type Description
None

None

Raises:

Type Description
NotFittedError

If the forecaster has not been fitted.

IndexError

If the index range of y does not match the training range.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data=np.arange(14, dtype=float),
...     index=pd.date_range(start="2022-01-01", periods=14, freq="D"),
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
>>> # Recompute and store residuals if needed
>>> forecaster.set_in_sample_residuals(y=data, random_state=123)
>>> forecaster.in_sample_residuals_.shape
(7,)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def set_in_sample_residuals(
    self, y: pd.Series, random_state: int = 123, exog: Any = None
) -> None:
    """
    Set in-sample residuals in case they were not calculated during the
    training process.

    In-sample residuals are calculated as the difference between the true
    values and the predictions made by the forecaster using the training
    data. The following internal attributes are updated:

    + `in_sample_residuals_`: residuals stored in a numpy ndarray.
    + `binner_intervals_`: intervals used to bin the residuals are calculated
    using the quantiles of the predicted values.
    + `in_sample_residuals_by_bin_`: residuals are binned according to the
    predicted value they are associated with and stored in a dictionary, where
    the keys are the intervals of the predicted values and the values are
    the residuals associated with that range.

    A total of 10_000 residuals are stored in the attribute `in_sample_residuals_`.
    If the number of residuals is greater than 10_000, a random sample of
    10_000 residuals is stored. The number of residuals stored per bin is
    limited to `10_000 // self.binner.n_bins_`.

    Args:
        y (pandas Series): Training time series.
        random_state (int, optional): Sets a seed to the random sampling for
            reproducible output. Defaults to 123.
        exog (Ignored): Not used, present here for API consistency by convention.

    Returns:
        None

    Raises:
        NotFittedError: If the forecaster has not been fitted.
        IndexError: If the index range of `y` does not match the training range.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data=np.arange(14, dtype=float),
        ...     index=pd.date_range(start="2022-01-01", periods=14, freq="D"),
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
        >>> # Recompute and store residuals if needed
        >>> forecaster.set_in_sample_residuals(y=data, random_state=123)
        >>> forecaster.in_sample_residuals_.shape
        (7,)

    """

    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `set_in_sample_residuals()`."
        )

    check_y(y=y)
    y_index_range = check_extract_values_and_index(
        data=y, data_label="`y`", return_values=False
    )[1][[0, -1]]
    if not y_index_range.equals(self.training_range_):
        raise IndexError(
            f"The index range of `y` does not match the range "
            f"used during training. Please ensure the index is aligned "
            f"with the training data.\n"
            f"    Expected : {self.training_range_}\n"
            f"    Received : {y_index_range}"
        )

    self._binning_in_sample_residuals(
        y=y, store_in_sample_residuals=True, random_state=random_state
    )

set_out_sample_residuals(y_true, y_pred, append=False, random_state=123)

Set new values to the attribute out_sample_residuals_. Out of sample residuals are meant to be calculated using observations that did not participate in the training process. Two internal attributes are updated:

  • out_sample_residuals_: residuals stored in a numpy ndarray.
  • out_sample_residuals_by_bin_: residuals are binned according to the predicted value they are associated with and stored in a dictionary, where the keys are the intervals of the predicted values and the values are the residuals associated with that range. If a bin binning is empty, it is filled with a random sample of residuals from other bins. This is done to ensure that all bins have at least one residual and can be used in the prediction process.

A total of 10_000 residuals are stored in the attribute out_sample_residuals_. If the number of residuals is greater than 10_000, a random sample of 10_000 residuals is stored. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_.

Parameters:

Name Type Description Default
y_true numpy ndarray, pandas Series

True values of the time series from which the residuals have been calculated.

required
y_pred numpy ndarray, pandas Series

Predicted values of the time series.

required
append bool

If True, new residuals are added to the once already stored in the forecaster. If after appending the new residuals, the limit of 10_000 // self.binner.n_bins_ values per bin is reached, a random sample of residuals is stored. Defaults to False.

False
random_state int

Sets a seed to the random sampling for reproducible output. Defaults to 123.

123

Returns:

Type Description
None

None

Raises:

Type Description
NotFittedError

If the forecaster has not been fitted.

TypeError

If y_true or y_pred are not numpy arrays or pandas Series.

ValueError

If y_true and y_pred have different lengths.

ValueError

If y_true and y_pred are pandas Series with different indexes.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data=np.arange(21, dtype=float),
...     index=pd.date_range(start="2022-01-01", periods=21, freq="D"),
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
>>> preds = forecaster.predict(steps=7)
>>> y_true = pd.Series(data[-7:].to_numpy(), index=preds.index)
>>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=preds)
>>> forecaster.out_sample_residuals_.shape
(7,)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def set_out_sample_residuals(
    self,
    y_true: np.ndarray | pd.Series,
    y_pred: np.ndarray | pd.Series,
    append: bool = False,
    random_state: int = 123,
) -> None:
    """
    Set new values to the attribute `out_sample_residuals_`. Out of sample
    residuals are meant to be calculated using observations that did not
    participate in the training process. Two internal attributes are updated:

    + `out_sample_residuals_`: residuals stored in a numpy ndarray.
    + `out_sample_residuals_by_bin_`: residuals are binned according to the
    predicted value they are associated with and stored in a dictionary, where
    the keys are the  intervals of the predicted values and the values are
    the residuals associated with that range. If a bin binning is empty, it
    is filled with a random sample of residuals from other bins. This is done
    to ensure that all bins have at least one residual and can be used in the
    prediction process.

    A total of 10_000 residuals are stored in the attribute `out_sample_residuals_`.
    If the number of residuals is greater than 10_000, a random sample of
    10_000 residuals is stored. The number of residuals stored per bin is
    limited to `10_000 // self.binner.n_bins_`.

    Args:
        y_true (numpy ndarray, pandas Series): True values of the time series
            from which the residuals have been calculated.
        y_pred (numpy ndarray, pandas Series): Predicted values of the time series.
        append (bool, optional): If `True`, new residuals are added to the once
            already stored in the forecaster. If after appending the new
            residuals, the limit of `10_000 // self.binner.n_bins_` values per
            bin is reached, a random sample of residuals is stored. Defaults
            to False.
        random_state (int, optional): Sets a seed to the random sampling for
            reproducible output. Defaults to 123.

    Returns:
        None

    Raises:
        NotFittedError: If the forecaster has not been fitted.
        TypeError: If `y_true` or `y_pred` are not numpy arrays or pandas Series.
        ValueError: If `y_true` and `y_pred` have different lengths.
        ValueError: If `y_true` and `y_pred` are pandas Series with different indexes.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data=np.arange(21, dtype=float),
        ...     index=pd.date_range(start="2022-01-01", periods=21, freq="D"),
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
        >>> preds = forecaster.predict(steps=7)
        >>> y_true = pd.Series(data[-7:].to_numpy(), index=preds.index)
        >>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=preds)
        >>> forecaster.out_sample_residuals_.shape
        (7,)

    """

    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `set_out_sample_residuals()`."
        )

    if not isinstance(y_true, (np.ndarray, pd.Series)):
        raise TypeError(
            f"`y_true` argument must be `numpy ndarray` or `pandas Series`. "
            f"Got {type(y_true)}."
        )

    if not isinstance(y_pred, (np.ndarray, pd.Series)):
        raise TypeError(
            f"`y_pred` argument must be `numpy ndarray` or `pandas Series`. "
            f"Got {type(y_pred)}."
        )

    if len(y_true) != len(y_pred):
        raise ValueError(
            f"`y_true` and `y_pred` must have the same length. "
            f"Got {len(y_true)} and {len(y_pred)}."
        )

    if isinstance(y_true, pd.Series) and isinstance(y_pred, pd.Series):
        if not y_true.index.equals(y_pred.index):
            raise ValueError("`y_true` and `y_pred` must have the same index.")

    if not isinstance(y_pred, np.ndarray):
        y_pred = y_pred.to_numpy()
    if not isinstance(y_true, np.ndarray):
        y_true = y_true.to_numpy()

    data = pd.DataFrame(
        {"prediction": y_pred, "residuals": y_true - y_pred}
    ).dropna()
    y_pred = data["prediction"].to_numpy()
    residuals = data["residuals"].to_numpy()

    data["bin"] = self.binner.transform(y_pred).astype(int)
    residuals_by_bin = data.groupby("bin")["residuals"].apply(np.array).to_dict()

    out_sample_residuals = (
        np.array([])
        if self.out_sample_residuals_ is None
        else self.out_sample_residuals_
    )
    out_sample_residuals_by_bin = (
        {}
        if self.out_sample_residuals_by_bin_ is None
        else self.out_sample_residuals_by_bin_
    )
    if append:
        out_sample_residuals = np.concatenate([out_sample_residuals, residuals])
        for k, v in residuals_by_bin.items():
            if k in out_sample_residuals_by_bin:
                out_sample_residuals_by_bin[k] = np.concatenate(
                    (out_sample_residuals_by_bin[k], v)
                )
            else:
                out_sample_residuals_by_bin[k] = v
    else:
        out_sample_residuals = residuals
        out_sample_residuals_by_bin = residuals_by_bin

    max_samples = 10_000 // self.binner.n_bins_
    rng = np.random.default_rng(seed=random_state)
    for k, v in out_sample_residuals_by_bin.items():
        if len(v) > max_samples:
            sample = rng.choice(a=v, size=max_samples, replace=False)
            out_sample_residuals_by_bin[k] = sample

    bin_keys = (
        [] if self.binner_intervals_ is None else self.binner_intervals_.keys()
    )
    for k in bin_keys:
        if k not in out_sample_residuals_by_bin:
            out_sample_residuals_by_bin[k] = np.array([])

    empty_bins = [k for k, v in out_sample_residuals_by_bin.items() if v.size == 0]
    if empty_bins:
        warnings.warn(
            f"The following bins have no out of sample residuals: {empty_bins}. "
            f"No predicted values fall in the interval "
            f"{[self.binner_intervals_[bin] for bin in empty_bins]}. "
            f"Empty bins will be filled with a random sample of residuals.",
            ResidualsUsageWarning,
        )
        empty_bin_size = min(max_samples, len(out_sample_residuals))
        for k in empty_bins:
            out_sample_residuals_by_bin[k] = rng.choice(
                a=out_sample_residuals, size=empty_bin_size, replace=False
            )

    if len(out_sample_residuals) > 10_000:
        out_sample_residuals = rng.choice(
            a=out_sample_residuals, size=10_000, replace=False
        )

    self.out_sample_residuals_ = out_sample_residuals
    self.out_sample_residuals_by_bin_ = out_sample_residuals_by_bin

summary()

Show forecaster information.

Returns:

Type Description
None

None

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
>>> data = pd.Series(
...     data=np.arange(14, dtype=float),
...     index=pd.date_range(start="2022-01-01", periods=14, freq="D"),
... )
>>> forecaster = ForecasterEquivalentDate(offset=7)
>>> forecaster.fit(y=data)
>>> forecaster.summary()
============================
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_equivalent_date.py
def summary(self) -> None:
    """
    Show forecaster information.

    Returns:
        None

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterEquivalentDate
        >>> data = pd.Series(
        ...     data=np.arange(14, dtype=float),
        ...     index=pd.date_range(start="2022-01-01", periods=14, freq="D"),
        ... )
        >>> forecaster = ForecasterEquivalentDate(offset=7)
        >>> forecaster.fit(y=data)
        >>> forecaster.summary()  # doctest: +ELLIPSIS
        ============================
    """

    print(self)

ForecasterRecursive

Bases: ForecasterBase

Recursive autoregressive forecaster for scikit-learn compatible estimators.

This class turns any estimator compatible with the scikit-learn API into a recursive autoregressive (multi-step) forecaster. The forecaster learns to predict future values by using lagged values of the target variable and optional exogenous features. Predictions are made iteratively, where each step uses previous predictions as input for the next step (recursive strategy).

Parameters:

Name Type Description Default
estimator object

Scikit-learn compatible estimator for regression. If None, a default estimator will be initialized. Can also be passed via regressor parameter.

None
lags Union[int, List[int], ndarray, range, None]

Lagged values of the target variable to use as predictors. Can be an integer (uses lags from 1 to lags), list of integers, numpy array, or range. At least one of lags or window_features must be provided. Defaults to None.

None
window_features Union[object, List[object], None]

List of window feature objects to compute features from the target variable. Each object must implement transform_batch() method. At least one of lags or window_features must be provided. Defaults to None.

None
transformer_y Optional[object]

Transformer object for the target variable. Must implement fit() and transform() methods. Applied before training and predictions. Defaults to None.

None
transformer_exog Optional[object]

Transformer object for exogenous variables. Must implement fit() and transform() methods. Applied before training and predictions. Defaults to None.

None
weight_func Optional[Callable]

Function to compute sample weights for training. Must accept an index and return an array of weights. Defaults to None.

None
differentiation Optional[int]

Order of differencing to apply to the target variable. Must be a positive integer. Differencing is applied before creating lags. Defaults to None.

None
fit_kwargs Optional[Dict[str, object]]

Dictionary of additional keyword arguments to pass to the estimator's fit() method. Defaults to None.

None
binner_kwargs Optional[Dict[str, object]]

Dictionary of keyword arguments for QuantileBinner used in probabilistic predictions. Defaults to {'n_bins': 10, 'method': 'linear'}.

None
forecaster_id Union[str, int, None]

Identifier for the forecaster instance. Can be a string or integer. Used for tracking and logging purposes. Defaults to None.

None
regressor object

Alternative parameter name for estimator. If provided, used instead of estimator. Defaults to None.

None

Attributes:

Name Type Description
estimator

Fitted scikit-learn estimator.

lags

Lag indices used in the model.

lags_names

Names of lag features (e.g., ['lag_1', 'lag_2']).

window_features

List of window feature transformers.

window_features_names

Names of window features.

window_size

Maximum window size needed (max of lags and window features).

transformer_y

Transformer for target variable.

transformer_exog

Transformer for exogenous variables.

weight_func

Function for sample weighting.

differentiation

Order of differencing applied.

differentiator

TimeSeriesDifferentiator instance if differencing is used.

is_fitted

Boolean indicating if forecaster has been fitted.

fit_date

Timestamp of the last fit operation.

last_window_

Last window_size observations from training data.

index_type_

Type of index in training data (RangeIndex or DatetimeIndex).

index_freq_

Frequency of DatetimeIndex if applicable.

training_range_

First and last index values of training data.

series_name_in_

Name of the target series.

exog_in_

Boolean indicating if exogenous variables were used in training.

exog_names_in_

Names of exogenous variables.

exog_type_in_

Type of exogenous input (Series or DataFrame).

X_train_features_names_out_

Names of all training features.

in_sample_residuals_

Residuals from training set.

in_sample_residuals_by_bin_

Residuals grouped by bins for probabilistic pred.

forecaster_id

Identifier for the forecaster instance.

Note
  • Either lags or window_features (or both) must be provided during initialization.
  • The forecaster uses a recursive strategy where each multi-step prediction depends on previous predictions within the same forecast horizon.
  • Exogenous variables must have the same index as the target variable and must be available for the entire prediction horizon.
  • The forecaster supports point predictions, prediction intervals, bootstrapping, quantile predictions, and probabilistic forecasts via conformal methods.

Examples:

Create a basic forecaster with lags:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> y = pd.Series(np.random.randn(100), name='y')
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=10
... )
>>> forecaster.fit(y)
>>> predictions = forecaster.predict(steps=5)

Create a forecaster with window features and transformations:

>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.preprocessing import StandardScaler
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> import pandas as pd
>>> y = pd.Series(np.random.randn(100), name='y')
>>> forecaster = ForecasterRecursive(
...     estimator=RandomForestRegressor(n_estimators=100),
...     lags=[1, 7, 30],
...     window_features=[RollingFeatures(stats='mean', window_sizes=7)],
...     transformer_y=StandardScaler(),
...     differentiation=1
... )
>>> forecaster.fit(y)
>>> predictions = forecaster.predict(steps=10)

Create a forecaster with exogenous variables:

>>> import pandas as pd
>>> from sklearn.linear_model import Ridge
>>> y = pd.Series(np.random.randn(100), name='target')
>>> exog = pd.DataFrame({'temp': np.random.randn(100)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=Ridge(),
...     lags=7,
...     forecaster_id='my_forecaster'
... )
>>> forecaster.fit(y, exog)
>>> exog_future = pd.DataFrame(
...     {'temp': np.random.randn(5)},
...     index=pd.RangeIndex(start=100, stop=105)
... )
>>> predictions = forecaster.predict(steps=5, exog=exog_future)

Create a forecaster with probabilistic prediction configuration:

>>> from sklearn.ensemble import GradientBoostingRegressor
>>> import pandas as pd
>>> y = pd.Series(np.random.randn(100), name='y')
>>> forecaster = ForecasterRecursive(
...     estimator=GradientBoostingRegressor(),
...     lags=14,
...     binner_kwargs={'n_bins': 15, 'method': 'linear'}
... )
>>> forecaster.fit(y, store_in_sample_residuals=True)
>>> predictions = forecaster.predict(steps=5)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
class ForecasterRecursive(ForecasterBase):
    """
    Recursive autoregressive forecaster for scikit-learn compatible estimators.

    This class turns any estimator compatible with the scikit-learn API into a
    recursive autoregressive (multi-step) forecaster. The forecaster learns to predict
    future values by using lagged values of the target variable and optional exogenous
    features. Predictions are made iteratively, where each step uses previous predictions
    as input for the next step (recursive strategy).

    Args:
        estimator: Scikit-learn compatible estimator for regression. If None, a default
            estimator will be initialized. Can also be passed via regressor parameter.
        lags: Lagged values of the target variable to use as predictors. Can be an
            integer (uses lags from 1 to lags), list of integers, numpy array, or range.
            At least one of lags or window_features must be provided. Defaults to None.
        window_features: List of window feature objects to compute features from the
            target variable. Each object must implement transform_batch() method.
            At least one of lags or window_features must be provided. Defaults to None.
        transformer_y: Transformer object for the target variable. Must implement fit()
            and transform() methods. Applied before training and predictions.
            Defaults to None.
        transformer_exog: Transformer object for exogenous variables. Must implement
            fit() and transform() methods. Applied before training and predictions.
            Defaults to None.
        weight_func: Function to compute sample weights for training. Must accept an
            index and return an array of weights. Defaults to None.
        differentiation: Order of differencing to apply to the target variable.
            Must be a positive integer. Differencing is applied before creating lags.
            Defaults to None.
        fit_kwargs: Dictionary of additional keyword arguments to pass to the estimator's
            fit() method. Defaults to None.
        binner_kwargs: Dictionary of keyword arguments for QuantileBinner used in
            probabilistic predictions. Defaults to {'n_bins': 10, 'method': 'linear'}.
        forecaster_id: Identifier for the forecaster instance. Can be a string or
            integer. Used for tracking and logging purposes. Defaults to None.
        regressor: Alternative parameter name for estimator. If provided, used instead
            of estimator. Defaults to None.

    Attributes:
        estimator: Fitted scikit-learn estimator.
        lags: Lag indices used in the model.
        lags_names: Names of lag features (e.g., ['lag_1', 'lag_2']).
        window_features: List of window feature transformers.
        window_features_names: Names of window features.
        window_size: Maximum window size needed (max of lags and window features).
        transformer_y: Transformer for target variable.
        transformer_exog: Transformer for exogenous variables.
        weight_func: Function for sample weighting.
        differentiation: Order of differencing applied.
        differentiator: TimeSeriesDifferentiator instance if differencing is used.
        is_fitted: Boolean indicating if forecaster has been fitted.
        fit_date: Timestamp of the last fit operation.
        last_window_: Last window_size observations from training data.
        index_type_: Type of index in training data (RangeIndex or DatetimeIndex).
        index_freq_: Frequency of DatetimeIndex if applicable.
        training_range_: First and last index values of training data.
        series_name_in_: Name of the target series.
        exog_in_: Boolean indicating if exogenous variables were used in training.
        exog_names_in_: Names of exogenous variables.
        exog_type_in_: Type of exogenous input (Series or DataFrame).
        X_train_features_names_out_: Names of all training features.
        in_sample_residuals_: Residuals from training set.
        in_sample_residuals_by_bin_: Residuals grouped by bins for probabilistic pred.
        forecaster_id: Identifier for the forecaster instance.

    Note:
        - Either lags or window_features (or both) must be provided during initialization.
        - The forecaster uses a recursive strategy where each multi-step prediction
          depends on previous predictions within the same forecast horizon.
        - Exogenous variables must have the same index as the target variable and must
          be available for the entire prediction horizon.
        - The forecaster supports point predictions, prediction intervals, bootstrapping,
          quantile predictions, and probabilistic forecasts via conformal methods.

    Examples:
        Create a basic forecaster with lags:

        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> y = pd.Series(np.random.randn(100), name='y')
        >>> forecaster = ForecasterRecursive(
        ...     estimator=LinearRegression(),
        ...     lags=10
        ... )
        >>> forecaster.fit(y)
        >>> predictions = forecaster.predict(steps=5)

        Create a forecaster with window features and transformations:

        >>> from sklearn.ensemble import RandomForestRegressor
        >>> from sklearn.preprocessing import StandardScaler
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> import pandas as pd
        >>> y = pd.Series(np.random.randn(100), name='y')
        >>> forecaster = ForecasterRecursive(
        ...     estimator=RandomForestRegressor(n_estimators=100),
        ...     lags=[1, 7, 30],
        ...     window_features=[RollingFeatures(stats='mean', window_sizes=7)],
        ...     transformer_y=StandardScaler(),
        ...     differentiation=1
        ... )
        >>> forecaster.fit(y)
        >>> predictions = forecaster.predict(steps=10)

        Create a forecaster with exogenous variables:

        >>> import pandas as pd
        >>> from sklearn.linear_model import Ridge
        >>> y = pd.Series(np.random.randn(100), name='target')
        >>> exog = pd.DataFrame({'temp': np.random.randn(100)}, index=y.index)
        >>> forecaster = ForecasterRecursive(
        ...     estimator=Ridge(),
        ...     lags=7,
        ...     forecaster_id='my_forecaster'
        ... )
        >>> forecaster.fit(y, exog)
        >>> exog_future = pd.DataFrame(
        ...     {'temp': np.random.randn(5)},
        ...     index=pd.RangeIndex(start=100, stop=105)
        ... )
        >>> predictions = forecaster.predict(steps=5, exog=exog_future)

        Create a forecaster with probabilistic prediction configuration:

        >>> from sklearn.ensemble import GradientBoostingRegressor
        >>> import pandas as pd
        >>> y = pd.Series(np.random.randn(100), name='y')
        >>> forecaster = ForecasterRecursive(
        ...     estimator=GradientBoostingRegressor(),
        ...     lags=14,
        ...     binner_kwargs={'n_bins': 15, 'method': 'linear'}
        ... )
        >>> forecaster.fit(y, store_in_sample_residuals=True)
        >>> predictions = forecaster.predict(steps=5)
    """

    def __init__(
        self,
        estimator: object = None,
        lags: Union[int, List[int], np.ndarray, range, None] = None,
        window_features: Union[object, List[object], None] = None,
        transformer_y: Optional[object] = None,
        transformer_exog: Optional[object] = None,
        weight_func: Optional[Callable] = None,
        differentiation: Optional[int] = None,
        fit_kwargs: Optional[Dict[str, object]] = None,
        binner_kwargs: Optional[Dict[str, object]] = None,
        forecaster_id: Union[str, int, None] = None,
        regressor: object = None,
    ) -> None:

        self.estimator = copy(initialize_estimator(estimator, regressor))
        self.transformer_y = transformer_y
        self.transformer_exog = transformer_exog
        self.weight_func = weight_func
        self.source_code_weight_func = None
        self.differentiation = differentiation
        self.differentiation_max = None
        self.differentiator = None
        self.last_window_ = None
        self.index_type_ = None
        self.index_freq_ = None
        self.training_range_ = None
        self.series_name_in_ = None
        self.exog_in_ = False
        self.exog_names_in_ = None
        self.exog_type_in_ = None
        self.exog_dtypes_in_ = None
        self.exog_dtypes_out_ = None
        self.X_train_window_features_names_out_ = None
        self.X_train_exog_names_out_ = None
        self.X_train_features_names_out_ = None
        self.in_sample_residuals_ = None
        self.out_sample_residuals_ = None
        self.in_sample_residuals_by_bin_ = None
        self.out_sample_residuals_by_bin_ = None
        self.creation_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
        self.is_fitted = False
        self.fit_date = None
        try:
            from spotforecast2_safe import __version__

            self.spotforecast_version = __version__
        except ImportError:
            self.spotforecast_version = "unknown"
        self.python_version = sys.version.split(" ")[0]
        self.forecaster_id = forecaster_id
        self._probabilistic_mode = "binned"

        (
            self.lags,
            self.lags_names,
            self.max_lag,
        ) = initialize_lags(type(self).__name__, lags)
        (
            self.window_features,
            self.window_features_names,
            self.max_size_window_features,
        ) = initialize_window_features(window_features)
        if self.window_features is None and self.lags is None:
            raise ValueError(
                "At least one of the arguments `lags` or `window_features` "
                "must be different from None. This is required to create the "
                "predictors used in training the forecaster."
            )

        self.window_size = max(
            [
                ws
                for ws in [self.max_lag, self.max_size_window_features]
                if ws is not None
            ]
        )
        self.window_features_class_names = None
        if window_features is not None:
            self.window_features_class_names = [
                type(wf).__name__ for wf in self.window_features
            ]

        self.weight_func, self.source_code_weight_func, _ = initialize_weights(
            forecaster_name=type(self).__name__,
            estimator=estimator,
            weight_func=weight_func,
            series_weights=None,
        )

        if differentiation is not None:
            if not isinstance(differentiation, int) or differentiation < 1:
                raise ValueError(
                    f"Argument `differentiation` must be an integer equal to or "
                    f"greater than 1. Got {differentiation}."
                )
            self.differentiation = differentiation
            self.differentiation_max = differentiation
            self.window_size += differentiation
            self.differentiator = TimeSeriesDifferentiator(
                order=differentiation  # , window_size=self.window_size # TODO: TimeSeriesDifferentiator in preprocessing created only takes order, add window_size if needed
            )

        self.fit_kwargs = check_select_fit_kwargs(
            estimator=estimator, fit_kwargs=fit_kwargs
        )

        self.binner_kwargs = binner_kwargs
        if binner_kwargs is None:
            self.binner_kwargs = {
                "n_bins": 10,
                "method": "linear",
            }
        self.binner = QuantileBinner(**self.binner_kwargs)
        self.binner_intervals_ = None

        self.__spotforecast_tags__ = {
            "library": "spotforecast",
            "forecaster_name": "ForecasterRecursive",
            "forecaster_task": "regression",
            "forecasting_scope": "single-series",  # single-series | global
            "forecasting_strategy": "recursive",  # recursive | direct | deep_learning
            "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
            "requires_index_frequency": True,
            "allowed_input_types_series": ["pandas.Series"],
            "supports_exog": True,
            "allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
            "handles_missing_values_series": False,
            "handles_missing_values_exog": True,
            "supports_lags": True,
            "supports_window_features": True,
            "supports_transformer_series": True,
            "supports_transformer_exog": True,
            "supports_weight_func": True,
            "supports_differentiation": True,
            "prediction_types": [
                "point",
                "interval",
                "bootstrapping",
                "quantiles",
                "distribution",
            ],
            "supports_probabilistic": True,
            "probabilistic_methods": ["bootstrapping", "conformal"],
            "handles_binned_residuals": True,
        }

    def __repr__(self) -> str:
        """
        Information displayed when a ForecasterRecursive object is printed.

        Returns:
            str: String representation of the forecaster with key information about its configuration and state.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> print(forecaster)  # doctest: +ELLIPSIS
            =========================
            ForecasterRecursive
            =========================
            Estimator: LinearRegression
            Lags: [1, 2, 3]
            Window features: []
            Window size: 3
            Series name: None
            Exogenous included: False
            Exogenous names: None
            Transformer for y: None
            Transformer for exog: None
            Weight function included: False
            Differentiation order: None
            Training range: None
            Training index type: None
            Training index frequency: None
            Estimator parameters: {...}
            fit_kwargs: {...}
            Creation date: ...
            Last fit date: None
            spotforecast version: ...
            Python version: ...
            Forecaster id: None

        """

        params = (
            self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
        )
        exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None

        info = (
            f"{'=' * len(type(self).__name__)} \n"
            f"{type(self).__name__} \n"
            f"{'=' * len(type(self).__name__)} \n"
            f"Estimator: {type(self.estimator).__name__} \n"
            f"Lags: {self.lags} \n"
            f"Window features: {self.window_features_names} \n"
            f"Window size: {self.window_size} \n"
            f"Series name: {self.series_name_in_} \n"
            f"Exogenous included: {self.exog_in_} \n"
            f"Exogenous names: {exog_names_in_} \n"
            f"Transformer for y: {self.transformer_y} \n"
            f"Transformer for exog: {self.transformer_exog} \n"
            f"Weight function included: {True if self.weight_func is not None else False} \n"
            f"Differentiation order: {self.differentiation} \n"
            f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
            f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
            f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
            f"Estimator parameters: {params} \n"
            f"fit_kwargs: {self.fit_kwargs} \n"
            f"Creation date: {self.creation_date} \n"
            f"Last fit date: {self.fit_date} \n"
            f"spotforecast version: {self.spotforecast_version} \n"
            f"Python version: {self.python_version} \n"
            f"Forecaster id: {self.forecaster_id} \n"
        )

        return info

    def _repr_html_(self) -> str:
        """
        HTML representation of the object.
        The "General Information" section is expanded by default.

        Returns:
            HTML string representation of the forecaster.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster._repr_html_()  # doctest: +ELLIPSIS
            '<div class="container-...">...</div>'
        """

        params = (
            self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
        )
        exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None

        style, unique_id = get_style_repr_html(self.is_fitted)

        content = f"""
        <div class="container-{unique_id}">
            <p style="font-size: 1.5em; font-weight: bold; margin-block-start: 0.83em; margin-block-end: 0.83em;">{type(self).__name__}</p>
            <details open>
                <summary>General Information</summary>
                <ul>
                    <li><strong>Estimator:</strong> {type(self.estimator).__name__}</li>
                    <li><strong>Lags:</strong> {self.lags}</li>
                    <li><strong>Window features:</strong> {self.window_features_names}</li>
                    <li><strong>Window size:</strong> {self.window_size}</li>
                    <li><strong>Series name:</strong> {self.series_name_in_}</li>
                    <li><strong>Exogenous included:</strong> {self.exog_in_}</li>
                    <li><strong>Weight function included:</strong> {self.weight_func is not None}</li>
                    <li><strong>Differentiation order:</strong> {self.differentiation}</li>
                    <li><strong>Creation date:</strong> {self.creation_date}</li>
                    <li><strong>Last fit date:</strong> {self.fit_date}</li>
                    <li><strong>spotforecast version:</strong> {self.spotforecast_version}</li>
                    <li><strong>Python version:</strong> {self.python_version}</li>
                    <li><strong>Forecaster id:</strong> {self.forecaster_id}</li>
                </ul>
            </details>
            <details>
                <summary>Exogenous Variables</summary>
                <ul>
                    {exog_names_in_}
                </ul>
            </details>
            <details>
                <summary>Data Transformations</summary>
                <ul>
                    <li><strong>Transformer for y:</strong> {self.transformer_y}</li>
                    <li><strong>Transformer for exog:</strong> {self.transformer_exog}</li>
                </ul>
            </details>
            <details>
                <summary>Training Information</summary>
                <ul>
                    <li><strong>Training range:</strong> {self.training_range_.to_list() if self.is_fitted else 'Not fitted'}</li>
                    <li><strong>Training index type:</strong> {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else 'Not fitted'}</li>
                    <li><strong>Training index frequency:</strong> {self.index_freq_ if self.is_fitted else 'Not fitted'}</li>
                </ul>
            </details>
            <details>
                <summary>Estimator Parameters</summary>
                <ul>
                    {params}
                </ul>
            </details>
            <details>
                <summary>Fit Kwargs</summary>
                <ul>
                    {self.fit_kwargs}
                </ul>
            </details>
        </div>
        """

        return style + content

    def __setstate__(self, state: dict) -> None:
        """
        Custom __setstate__ to ensure backward compatibility when unpickling.
        Only sets __spotforecast_tags__ if not present, preserving custom tags.
        """
        super().__setstate__(state)
        if not hasattr(self, "__spotforecast_tags__"):
            self.__spotforecast_tags__ = {
                "library": "spotforecast",
                "forecaster_name": "ForecasterRecursive",
                "forecaster_task": "regression",
                "forecasting_scope": "single-series",
                "forecasting_strategy": "recursive",
                "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
                "requires_index_frequency": True,
                "allowed_input_types_series": ["pandas.Series"],
                "supports_exog": True,
                "allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
                "handles_missing_values_series": False,
                "handles_missing_values_exog": True,
                "supports_lags": True,
                "supports_window_features": True,
                "supports_transformer_series": True,
                "supports_transformer_exog": True,
                "supports_weight_func": True,
                "supports_differentiation": True,
                "prediction_types": [
                    "point",
                    "interval",
                    "bootstrapping",
                    "quantiles",
                    "distribution",
                ],
                "supports_probabilistic": True,
                "probabilistic_methods": ["bootstrapping", "conformal"],
                "handles_binned_residuals": True,
            }

    def _create_lags(
        self,
        y: np.ndarray,
        X_as_pandas: bool = False,
        train_index: Optional[pd.Index] = None,
    ) -> Tuple[Optional[Union[np.ndarray, pd.DataFrame]], np.ndarray]:
        """
        Create lagged predictors and aligned target values.

        Args:
            y: Target values used to build lag features. Expected shape is
                (n_samples,) or (n_samples, 1).
            X_as_pandas: If True, returns lagged features as a pandas DataFrame.
            train_index: Index to use for the lagged feature DataFrame when
                `X_as_pandas` is True.

        Returns:
            Tuple containing:
                - X_data: Lagged predictors with shape (n_rows, n_lags) or None
                  if no lags are configured.
                - y_data: Target values aligned to the lagged predictors with
                  shape (n_rows,).

        Raises:
            ValueError: If `X_as_pandas` is True but `train_index` is not provided.
            ValueError: If the length of `y` is not sufficient to create the
                specified lags.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(lags=3)
            >>> y = np.arange(10)
            >>> train_index = pd.RangeIndex(start=3, stop=10)
            >>> X_data, y_data = forecaster._create_lags(y=y, X_as_pandas=True, train_index=train_index)
            >>> isinstance(X_data, pd.DataFrame)
            True
            >>> X_data.shape
            (7, 3)
            >>> y_data.shape
            (7,)
        """
        if X_as_pandas and train_index is None:
            raise ValueError(
                "If `X_as_pandas` is True, `train_index` must be provided."
            )

        if len(y) <= self.window_size:
            raise ValueError(
                f"Length of `y` must be greater than the maximum window size "
                f"needed by the forecaster.\n"
                f"    Length `y`: {len(y)}.\n"
                f"    Max window size: {self.window_size}."
            )

        X_data = None
        if self.lags is not None:
            # y = y.ravel() # Assuming y is already raveled
            # Using stride_tricks for sliding window
            y_strided = np.lib.stride_tricks.sliding_window_view(y, self.window_size)[
                :-1
            ]
            X_data = y_strided[:, self.window_size - self.lags]

            if X_as_pandas:
                X_data = pd.DataFrame(
                    data=X_data, columns=self.lags_names, index=train_index
                )

        y_data = y[self.window_size :]

        return X_data, y_data

    def _create_window_features(
        self,
        y: pd.Series,
        train_index: pd.Index,
        X_as_pandas: bool = False,
    ) -> Tuple[List[Union[np.ndarray, pd.DataFrame]], List[str]]:
        """
        Generate window features from the target series.

        Args:
            y: Target series used to compute window features. Must be a pandas
                Series with an index aligned to `train_index` after trimming.
            train_index: Index for the training rows to align the window features.
            X_as_pandas: If True, keeps each window feature matrix as a pandas
                DataFrame; otherwise converts to NumPy arrays.

        Returns:
            Tuple containing:
                - X_train_window_features: List of window feature matrices, one
                  per window feature transformer.
                - X_train_window_features_names_out_: List of feature names for
                  all generated window features.

        Raises:
            TypeError: If any window feature's `transform_batch` method does not
                return a pandas DataFrame.
            ValueError: If the output DataFrame from any window feature does not
                have the same number of rows as `train_index` or if the index
                does not match `train_index`.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> train_index = y.index[3:]  # Assuming window_size is 3
            >>> X_train_window_features, feature_names = forecaster._create_window_features(
            ...     y=y,
            ...     train_index=train_index,
            ...     X_as_pandas=True
            ... )
            >>> isinstance(X_train_window_features[0], pd.DataFrame)
            True
            >>> X_train_window_features[0].shape[0] == len(train_index)
            True
            >>> (X_train_window_features[0].index == train_index).all()
            True

        """

        len_train_index = len(train_index)
        X_train_window_features = []
        X_train_window_features_names_out_ = []
        for wf in self.window_features:
            X_train_wf = wf.transform_batch(y)
            if not isinstance(X_train_wf, pd.DataFrame):
                raise TypeError(
                    f"The method `transform_batch` of {type(wf).__name__} "
                    f"must return a pandas DataFrame."
                )
            X_train_wf = X_train_wf.iloc[-len_train_index:]
            if not len(X_train_wf) == len_train_index:
                raise ValueError(
                    f"The method `transform_batch` of {type(wf).__name__} "
                    f"must return a DataFrame with the same number of rows as "
                    f"the input time series - `window_size`: {len_train_index}."
                )
            if not (X_train_wf.index == train_index).all():
                raise ValueError(
                    f"The method `transform_batch` of {type(wf).__name__} "
                    f"must return a DataFrame with the same index as "
                    f"the input time series - `window_size`."
                )

            X_train_window_features_names_out_.extend(X_train_wf.columns)
            if not X_as_pandas:
                X_train_wf = X_train_wf.to_numpy()
            X_train_window_features.append(X_train_wf)

        return X_train_window_features, X_train_window_features_names_out_

    def _create_train_X_y(
        self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
    ) -> Tuple[
        pd.DataFrame,
        pd.Series,
        List[str],
        List[str],
        List[str],
        List[str],
        Dict[str, type],
        Dict[str, type],
    ]:
        """Create training predictors and target values.

        Args:
            y: Target series for training. Must be a pandas Series.
            exog:
                Optional exogenous variables for training. Can be a pandas Series or DataFrame.
                Must have the same index as `y` and cover the same time range.

        Returns:
            Tuple containing:
                - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided).
                - y_train: Series of target values aligned with the predictors.
                - X_train_features_names_out_: List of all predictor feature names.
                - lags_names: List of lag feature names.
                - window_features_names: List of window feature names.
                - exog_names_in_: List of exogenous variable names (if exogenous variables are used).
                - exog_dtypes_in_: Dictionary of input data types for exogenous variables.
                - exog_dtypes_out_: Dictionary of output data types for exogenous variables after transformation (if exogenous variables are used).

        Raises:
            ValueError: If the length of `y` is not sufficient to create the specified lags and window features.
            ValueError: If `exog` is provided but does not have the same index as `y` or does not cover the same time range.
            ValueError: If `exog` is provided but contains data types that are not supported after transformation.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> (X_train, y_train, exog_names_in_, window_features_names,
            ...  exog_names_out, feature_names, exog_dtypes_in_,
            ...  exog_dtypes_out_) = forecaster._create_train_X_y(y=y, exog=exog)
            >>> isinstance(X_train, pd.DataFrame)
            True
            >>> isinstance(y_train, pd.Series)
            True
            >>> feature_names == forecaster.lags_names + window_features_names + exog_names_out
            True
        """
        check_y(y=y)
        y = input_to_frame(data=y, input_name="y")

        if len(y) <= self.window_size:
            raise ValueError(
                f"Length of `y` must be greater than the maximum window size "
                f"needed by the forecaster.\n"
                f"    Length `y`: {len(y)}.\n"
                f"    Max window size: {self.window_size}.\n"
                f"    Lags window size: {self.max_lag}.\n"
                f"    Window features window size: {self.max_size_window_features}."
            )

        fit_transformer = False if self.is_fitted else True
        y = transform_dataframe(
            df=y,
            transformer=self.transformer_y,
            fit=fit_transformer,
            inverse_transform=False,
        )
        y_values, y_index = check_extract_values_and_index(data=y, data_label="`y`")
        if y_values.ndim == 2 and y_values.shape[1] == 1:
            y_values = y_values.ravel()
        train_index = y_index[self.window_size :]

        if self.differentiation is not None:
            if not self.is_fitted:
                y_values = self.differentiator.fit_transform(y_values)
            else:
                differentiator = copy(self.differentiator)
                y_values = differentiator.fit_transform(y_values)

        exog_names_in_ = None
        exog_dtypes_in_ = None
        exog_dtypes_out_ = None
        X_as_pandas = False
        if exog is not None:
            check_exog(exog=exog, allow_nan=True)
            exog = input_to_frame(data=exog, input_name="exog")
            _, exog_index = check_extract_values_and_index(
                data=exog, data_label="`exog`", ignore_freq=True, return_values=False
            )

            len_y_original = len(y)
            len_train = len(train_index)
            len_exog = len(exog)

            if not len_exog == len_y_original and not len_exog == len_train:
                raise ValueError(
                    f"Length mismatch for exogenous variables. Expected either:\n"
                    f"  - Full length matching `y`: {len_y_original} observations, OR\n"
                    f"  - Pre-aligned length: {len_train} observations (y length - window_size)\n"
                    f"Got: {len_exog} observations.\n"
                    f"Window size: {self.window_size}"
                )

            if len_exog == len_y_original:
                if not (exog_index == y_index).all():
                    raise ValueError(
                        "When `exog` has the same length as `y`, the index of "
                        "`exog` must be aligned with the index of `y` "
                        "to ensure the correct alignment of values."
                    )
                # Standard case: exog covers full y range, trim by window_size
                exog = exog.iloc[self.window_size :, :]
            else:
                if not (exog_index == train_index).all():
                    raise ValueError(
                        "When `exog` already starts after the first `window_size` "
                        "observations, its index must be aligned with the index "
                        "of `y` starting from `window_size`."
                    )

            exog_names_in_ = exog.columns.to_list()
            exog_dtypes_in_ = get_exog_dtypes(exog=exog)

            exog = transform_dataframe(
                df=exog,
                transformer=self.transformer_exog,
                fit=fit_transformer,
                inverse_transform=False,
            )

            check_exog_dtypes(exog, call_check_exog=True)
            exog_dtypes_out_ = get_exog_dtypes(exog=exog)
            X_as_pandas = any(
                not pd.api.types.is_numeric_dtype(dtype)
                or pd.api.types.is_bool_dtype(dtype)
                for dtype in set(exog.dtypes)
            )

        X_train = []
        X_train_features_names_out_ = []

        # Create lags
        # Note: y_values might have NaNs from differentiation.
        # TODO: check if _create_lags handles this!
        X_train_lags, y_train = self._create_lags(
            y=y_values, X_as_pandas=X_as_pandas, train_index=train_index
        )
        if X_train_lags is not None:
            X_train.append(X_train_lags)
            X_train_features_names_out_.extend(self.lags_names)

        X_train_window_features_names_out_ = None
        if self.window_features is not None:
            n_diff = 0 if self.differentiation is None else self.differentiation
            if isinstance(y_values, pd.Series):
                y_vals_for_wf = y_values.iloc[n_diff:]
                y_index_for_wf = y_index[n_diff:]
            else:
                y_vals_for_wf = y_values[n_diff:]
                y_index_for_wf = y_index[n_diff:]

            y_window_features = pd.Series(y_vals_for_wf, index=y_index_for_wf)
            X_train_window_features, X_train_window_features_names_out_ = (
                self._create_window_features(
                    y=y_window_features,
                    X_as_pandas=X_as_pandas,
                    train_index=train_index,
                )
            )
            X_train.extend(X_train_window_features)
            X_train_features_names_out_.extend(X_train_window_features_names_out_)

        X_train_exog_names_out_ = None
        if exog is not None:
            X_train_exog_names_out_ = exog.columns.to_list()
            if not X_as_pandas:
                exog = exog.to_numpy()
            X_train_features_names_out_.extend(X_train_exog_names_out_)
            X_train.append(exog)

        if len(X_train) == 1:
            X_train = X_train[0]
        else:
            if X_as_pandas:
                X_train = pd.concat(X_train, axis=1)
            else:
                X_train = np.concatenate(X_train, axis=1)

        if X_as_pandas:
            X_train.index = train_index
        else:
            X_train = pd.DataFrame(
                data=X_train, index=train_index, columns=X_train_features_names_out_
            )

        y_train = pd.Series(data=y_train, index=train_index, name="y")

        return (
            X_train,
            y_train,
            exog_names_in_,
            X_train_window_features_names_out_,
            X_train_exog_names_out_,
            X_train_features_names_out_,
            exog_dtypes_in_,
            exog_dtypes_out_,
        )

    def create_train_X_y(
        self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
    ) -> Tuple[pd.DataFrame, pd.Series]:
        """Public method to create training predictors and target values.

        This method is a public wrapper around the internal method `_create_train_X_y`,
        which generates the training predictors and target values based on the provided time series and exogenous variables.
        It ensures that the necessary transformations and feature engineering steps are applied to prepare the data for training the forecaster.

        Args:
            y: Target series for training. Must be a pandas Series.
            exog: Optional exogenous variables for training. Can be a pandas Series or DataFrame. Must have the same index as `y` and cover the same time range. Defaults to None.

        Returns:
            Tuple containing:
                - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided).
                - y_train: Series of target values aligned with the predictors.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
            >>> isinstance(X_train, pd.DataFrame)
            True
            >>> isinstance(y_train, pd.Series)
            True

        """
        output = self._create_train_X_y(y=y, exog=exog)

        return output[0], output[1]

    def _train_test_split_one_step_ahead(
        self,
        y: pd.Series,
        initial_train_size: int,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
    ) -> Tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
        """
        Create matrices needed to train and test the forecaster for one-step-ahead
        predictions.

        Args:
            y: Training time series.
            initial_train_size: Initial size of the training set. It is the number of
                observations used to train the forecaster before making the first
                prediction.
            exog: Exogenous variable/s included as predictor/s. Must have the same
                number of observations as y and their indexes must be aligned.
                Defaults to None.

        Returns:
            Tuple containing:
                - X_train: Predictor values used to train the model as pandas DataFrame.
                - y_train: Target values related to each row of X_train as pandas Series.
                - X_test: Predictor values used to test the model as pandas DataFrame.
                - y_test: Target values related to each row of X_test as pandas Series.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> X_train, y_train, X_test, y_test = forecaster._train_test_split_one_step_ahead(y=y, initial_train_size=20, exog=exog)
            >>> isinstance(X_train, pd.DataFrame)
            True
            >>> isinstance(y_train, pd.Series)
            True
            >>> isinstance(X_test, pd.DataFrame)
            True
            >>> isinstance(y_test, pd.Series)
            True
        """

        is_fitted = self.is_fitted
        self.is_fitted = False
        X_train, y_train, *_ = self._create_train_X_y(
            y=y.iloc[:initial_train_size],
            exog=exog.iloc[:initial_train_size] if exog is not None else None,
        )

        test_init = initial_train_size - self.window_size
        self.is_fitted = True
        X_test, y_test, *_ = self._create_train_X_y(
            y=y.iloc[test_init:],
            exog=exog.iloc[test_init:] if exog is not None else None,
        )

        self.is_fitted = is_fitted

        return X_train, y_train, X_test, y_test

    def get_params(self, deep: bool = True) -> Dict[str, object]:
        """
        Get parameters for this forecaster.

        Args:
            deep: If True, will return the parameters for this forecaster and
                contained sub-objects that are estimators.

        Returns:
            params: Dictionary of parameter names mapped to their values.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.get_params()  # doctest: +ELLIPSIS
            {
                'estimator': LinearRegression(), 'lags': 3, 'window_features': None,
                'transformer_y': None, 'transformer_exog': None, 'weight_func': None,
                'differentiation': None, 'fit_kwargs': {}, 'binner_kwargs': None, 'forecaster_id': '...'}
        """
        params = {}
        for key in [
            "estimator",
            "lags",
            "window_features",
            "transformer_y",
            "transformer_exog",
            "weight_func",
            "differentiation",
            "fit_kwargs",
            "binner_kwargs",
            "forecaster_id",
        ]:
            if hasattr(self, key):
                params[key] = getattr(self, key)

        if not deep:
            return params

        if hasattr(self, "estimator") and self.estimator is not None:
            if hasattr(self.estimator, "get_params"):
                for key, value in self.estimator.get_params(deep=True).items():
                    params[f"estimator__{key}"] = value

        return params

    def set_params(
        self, params: Dict[str, object] = None, **kwargs: object
    ) -> "ForecasterRecursive":
        """
        Set the parameters of this forecaster.

        Args:
            params: Optional dictionary of parameter names mapped to their new values.
                If provided, these parameters are set first.
            **kwargs: Dictionary of parameter names mapped to their new values.
                Parameters can be for the forecaster itself or for the contained estimator (using the `estimator__` prefix).

        Returns:
            self: The forecaster instance with updated parameters.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.set_params(estimator__fit_intercept=False)
            >>> forecaster.estimator.get_params()["fit_intercept"]
            False
        """

        # Merge params dict and kwargs
        all_params = {}
        if params is not None:
            all_params.update(params)
        all_params.update(kwargs)

        if not all_params:
            return self

        valid_params = self.get_params(deep=True)
        nested_params = {}

        for key, value in all_params.items():
            if key not in valid_params and "__" not in key:
                # Relaxed check for now
                pass

            if "__" in key:
                obj_name, param_name = key.split("__", 1)
                if obj_name not in nested_params:
                    nested_params[obj_name] = {}
                nested_params[obj_name][param_name] = value
            else:
                setattr(self, key, value)

        for obj_name, obj_params in nested_params.items():
            if hasattr(self, obj_name):
                obj = getattr(self, obj_name)
                if hasattr(obj, "set_params"):
                    obj.set_params(**obj_params)
                else:
                    for param_name, value in obj_params.items():
                        setattr(obj, param_name, value)

        return self

    def fit(
        self,
        y: pd.Series,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
        store_last_window: bool = True,
        store_in_sample_residuals: bool = False,
        random_state: int = 123,
        suppress_warnings: bool = False,
    ) -> None:
        """
        Fit the forecaster to the training data.

        Args:
            y:
                  Target series for training. Must be a pandas Series.
            exog:
                  Optional exogenous variables for training. Can be a pandas Series or DataFrame.Must have the same index as `y` and cover the same time range. Defaults to None.
            store_last_window:
                  Whether to store the last window of the training series for use in prediction. Defaults to True.
            store_in_sample_residuals:
                  Whether to store in-sample residuals after fitting, which can be used for certain probabilistic prediction methods. Defaults to False.
            random_state:
                  Random seed for reproducibility when sampling residuals if `store_in_sample_residuals` is True. Defaults to 123.
            suppress_warnings:
                  Whether to suppress warnings during fitting, such as those related to insufficient data length for lags or window features. Defaults to False.

        Returns:
            None

        Examples:
                 >>> import numpy as np
                 >>> import pandas as pd
                 >>> from sklearn.linear_model import LinearRegression
                 >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
                 >>> from spotforecast2_safe.preprocessing import RollingFeatures
                 >>> y = pd.Series(np.arange(30), name='y')
                 >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
                 >>> forecaster = ForecasterRecursive(
                 ...     estimator=LinearRegression(),
                 ...     lags=3,
                 ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
                 ... )
                 >>> forecaster.fit(y=y, exog=exog, store_in_sample_residuals=True)
        """

        set_skforecast_warnings(suppress_warnings, action="ignore")

        # Reset values in case the forecaster has already been fitted.
        self.last_window_ = None
        self.index_type_ = None
        self.index_freq_ = None
        self.training_range_ = None
        self.series_name_in_ = None
        self.exog_in_ = False
        self.exog_names_in_ = None
        self.exog_type_in_ = None
        self.exog_dtypes_in_ = None
        self.exog_dtypes_out_ = None
        self.X_train_window_features_names_out_ = None
        self.X_train_exog_names_out_ = None
        self.X_train_features_names_out_ = None
        self.in_sample_residuals_ = None
        self.in_sample_residuals_by_bin_ = None
        self.binner_intervals_ = None
        self.is_fitted = False
        self.fit_date = None

        (
            X_train,
            y_train,
            exog_names_in_,
            X_train_window_features_names_out_,
            X_train_exog_names_out_,
            X_train_features_names_out_,
            exog_dtypes_in_,
            exog_dtypes_out_,
        ) = self._create_train_X_y(y=y, exog=exog)

        sample_weight = self.create_sample_weights(X_train=X_train)

        if sample_weight is not None:
            self.estimator.fit(
                X=X_train,
                y=y_train,
                sample_weight=sample_weight,
                **self.fit_kwargs,
            )
        else:
            self.estimator.fit(X=X_train, y=y_train, **self.fit_kwargs)

        self.X_train_window_features_names_out_ = X_train_window_features_names_out_
        self.X_train_features_names_out_ = X_train_features_names_out_

        self.is_fitted = True
        self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
        self.training_range_ = y.index[[0, -1]]
        self.index_type_ = type(y.index)
        if isinstance(y.index, pd.DatetimeIndex):
            self.index_freq_ = y.index.freqstr
        else:
            try:
                self.index_freq_ = y.index.step
            except AttributeError:
                self.index_freq_ = None

        if exog is not None:
            self.exog_in_ = True
            self.exog_type_in_ = type(exog)
            self.exog_names_in_ = exog_names_in_
            self.exog_dtypes_in_ = exog_dtypes_in_
            self.exog_dtypes_out_ = exog_dtypes_out_
            self.X_train_exog_names_out_ = X_train_exog_names_out_

        self.series_name_in_ = y.name if y.name is not None else "y"

        # NOTE: This is done to save time during fit in functions such as backtesting()
        if self._probabilistic_mode is not False:
            self._binning_in_sample_residuals(
                y_true=y_train.to_numpy(),
                y_pred=self.estimator.predict(X_train).ravel(),
                store_in_sample_residuals=store_in_sample_residuals,
                random_state=random_state,
            )

        if store_last_window:
            self.last_window_ = (
                y.iloc[-self.window_size :]
                .copy()
                .to_frame(name=y.name if y.name is not None else "y")
            )

        set_skforecast_warnings(suppress_warnings, action="default")

    def create_sample_weights(self, X_train: pd.DataFrame) -> np.ndarray:
        """
        Create weights for each observation according to the forecaster's attribute
        `weight_func`.

        Args:
            X_train: Dataframe created with the `create_train_X_y` method, first return.

        Returns:
            Weights to use in `fit` method.
        """

        sample_weight = None

        if self.weight_func is not None:
            sample_weight = self.weight_func(X_train.index)

        if sample_weight is not None:
            if np.isnan(sample_weight).any():
                raise ValueError(
                    "The resulting `sample_weight` cannot have NaN values."
                )
            if np.any(sample_weight < 0):
                raise ValueError(
                    "The resulting `sample_weight` cannot have negative values."
                )
            if np.sum(sample_weight) == 0:
                raise ValueError(
                    "The resulting `sample_weight` cannot be normalized because "
                    "the sum of the weights is zero."
                )

        return sample_weight

    def _create_predict_inputs(
        self,
        steps: int | str | pd.Timestamp,
        last_window: Union[pd.Series, pd.DataFrame, None] = None,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
        predict_probabilistic: bool = False,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        check_inputs: bool = True,
    ) -> Tuple[np.ndarray, Union[np.ndarray, None], pd.Index, int]:
        """
        Create the inputs needed for the first iteration of the prediction
        process. As this is a recursive process, the last window is updated at
        each iteration of the prediction process.

        Args:
            steps: Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data.
            exog: Exogenous variable/s included as predictor/s.
            predict_probabilistic: If `True`, the necessary checks for probabilistic predictions will be
                performed.
            use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method.
            use_binned_residuals: If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly.
            check_inputs: If `True`, the input is checked for possible warnings and errors
                with the `check_predict_input` function. This argument is created
                for internal use and is not recommended to be changed.

        Returns:
            - last_window_values:
                Numpy array of the last window values to use for prediction,
                transformed and ready for input into the prediction method.
            - exog_values:
                Numpy array of exogenous variable values for prediction,
                transformed and ready for input into the prediction method,
                or None if no exogenous variables are used.
            - prediction_index:
                Pandas Index for the predicted values, constructed based on the
                last window index and the number of steps to predict.
            - steps:
                Number of future steps predicted.
        """

        if last_window is None:
            last_window = self.last_window_

        if self.is_fitted:
            steps = date_to_index_position(
                index=last_window.index,
                date_input=steps,
                method="prediction",
                date_literal="steps",
            )

        if check_inputs:
            check_predict_input(
                forecaster_name=type(self).__name__,
                steps=steps,
                is_fitted=self.is_fitted,
                exog_in_=self.exog_in_,
                index_type_=self.index_type_,
                index_freq_=self.index_freq_,
                window_size=self.window_size,
                last_window=last_window,
                last_window_exog=None,
                exog=exog,
                exog_names_in_=self.exog_names_in_,
                interval=None,
            )

            if predict_probabilistic:
                check_residuals_input(
                    forecaster_name=type(self).__name__,
                    use_in_sample_residuals=use_in_sample_residuals,
                    in_sample_residuals_=self.in_sample_residuals_,
                    out_sample_residuals_=self.out_sample_residuals_,
                    use_binned_residuals=use_binned_residuals,
                    in_sample_residuals_by_bin_=self.in_sample_residuals_by_bin_,
                    out_sample_residuals_by_bin_=self.out_sample_residuals_by_bin_,
                )

        last_window_values = (
            last_window.iloc[-self.window_size :].to_numpy(copy=True).ravel()
        )
        last_window_values = transform_numpy(
            array=last_window_values,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=False,
        )
        if self.differentiation is not None:
            last_window_values = self.differentiator.fit_transform(last_window_values)

        if exog is not None:
            exog = input_to_frame(data=exog, input_name="exog")
            if exog.columns.tolist() != self.exog_names_in_:
                exog = exog[self.exog_names_in_]

            exog = transform_dataframe(
                df=exog,
                transformer=self.transformer_exog,
                fit=False,
                inverse_transform=False,
            )

            if not exog.dtypes.to_dict() == self.exog_dtypes_out_:
                check_exog_dtypes(exog=exog)
            else:
                check_exog(exog=exog, allow_nan=False)

            exog_values = exog.to_numpy()[:steps]
        else:
            exog_values = None

        prediction_index = expand_index(index=last_window.index, steps=steps)

        if self.transformer_y is not None or self.differentiation is not None:
            warnings.warn(
                "The output matrix is in the transformed scale due to the "
                "inclusion of transformations or differentiation in the Forecaster. "
                "As a result, any predictions generated using this matrix will also "
                "be in the transformed scale. Please refer to the documentation "
                "for more details: "
                "https://skforecast.org/latest/user_guides/training-and-prediction-matrices.html",
                DataTransformationWarning,
            )

        return last_window_values, exog_values, prediction_index, steps

    def _recursive_predict(
        self,
        steps: int,
        last_window_values: np.ndarray,
        exog_values: np.ndarray | None = None,
    ) -> np.ndarray:
        """
        Predict n steps ahead. It is an iterative process in which, each prediction,
        is used as a predictor for the next step.

        Args:
            steps:
                Number of steps to predict.
            last_window_values:
                Series values used to create the predictors needed in the first
                iteration of the prediction (t + 1).
            exog_values:
                Exogenous variable/s included as predictor/s.

        Returns:
            Predicted values.
        """

        original_device = set_cpu_gpu_device(estimator=self.estimator, device="cpu")

        n_lags = len(self.lags) if self.lags is not None else 0
        n_window_features = (
            len(self.X_train_window_features_names_out_)
            if self.window_features is not None
            else 0
        )
        n_exog = exog_values.shape[1] if exog_values is not None else 0

        X = np.full(
            shape=(n_lags + n_window_features + n_exog), fill_value=np.nan, dtype=float
        )
        predictions = np.full(shape=steps, fill_value=np.nan, dtype=float)
        last_window = np.concatenate((last_window_values, predictions))

        estimator_name = type(self.estimator).__name__
        is_linear = isinstance(self.estimator, LinearModel)
        is_lightgbm = estimator_name == "LGBMRegressor"
        is_xgboost = estimator_name == "XGBRegressor"

        if is_linear:
            coef = self.estimator.coef_
            intercept = self.estimator.intercept_
        elif is_lightgbm:
            booster = self.estimator.booster_
        elif is_xgboost:
            booster = self.estimator.get_booster()

        has_lags = self.lags is not None
        has_window_features = self.window_features is not None
        has_exog = exog_values is not None

        for i in range(steps):

            if has_lags:
                X[:n_lags] = last_window[-self.lags - (steps - i)]
            if has_window_features:
                window_data = last_window[i : -(steps - i)]
                X[n_lags : n_lags + n_window_features] = np.concatenate(
                    [wf.transform(window_data) for wf in self.window_features]
                )
            if has_exog:
                X[n_lags + n_window_features :] = exog_values[i]

            if is_linear:
                pred = np.dot(X, coef) + intercept
            elif is_lightgbm:
                pred = booster.predict(X.reshape(1, -1))
            elif is_xgboost:
                pred = booster.inplace_predict(X.reshape(1, -1))
            else:
                pred = self.estimator.predict(X.reshape(1, -1)).ravel()

            pred = pred.item()
            predictions[i] = pred

            # Update `last_window` values. The first position is discarded and
            # the new prediction is added at the end.
            last_window[-(steps - i)] = pred

        set_cpu_gpu_device(estimator=self.estimator, device=original_device)

        return predictions

    def _recursive_predict_bootstrapping(
        self,
        steps: int,
        last_window_values: np.ndarray,
        sampled_residuals: np.ndarray,
        use_binned_residuals: bool,
        n_boot: int,
        exog_values: np.ndarray | None = None,
    ) -> np.ndarray:
        """
        Vectorized bootstrap prediction - predict all n_boot iterations per step.
        Instead of running n_boot sequential predictions, this method predicts
        all bootstrap samples at once per step, significantly reducing overhead.

        Args:
            steps:
                Number of steps to predict.
            last_window_values:
                Series values used to create the predictors needed in the first
                iteration of the prediction (t + 1).
            sampled_residuals:
                Pre-sampled residuals for all bootstrap iterations.
                - If `use_binned_residuals=True`: 3D array of shape (n_bins, steps, n_boot)
                - If `use_binned_residuals=False`: 2D array of shape (steps, n_boot)
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values.
                If `False`, residuals are selected randomly.
            n_boot:
                Number of bootstrap iterations.
            exog_values:
                Exogenous variable/s included as predictor/s. Defaults to None.

        Returns:
            Numpy ndarray with the predicted values. Shape (steps, n_boot).

        Raises:
            ValueError:
                If `sampled_residuals` does not match the expected shape/dimensions.
            IndexError:
                If `last_window_values` or `exog_values` are not of expected lengths.

        Examples:
            >>> import numpy as np
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=2)
            >>> _ = forecaster.fit(y=pd.Series(np.arange(10)))
            >>> last_window = np.array([8, 9])
            >>> residuals = np.random.normal(size=(3, 5)) # 3 steps, 5 boots
            >>> preds = forecaster._recursive_predict_bootstrapping(
            ...     steps=3,
            ...     last_window_values=last_window,
            ...     sampled_residuals=residuals,
            ...     use_binned_residuals=False,
            ...     n_boot=5
            ... )
            >>> preds.shape
            (3, 5)
        """

        original_device = set_cpu_gpu_device(estimator=self.estimator, device="cpu")

        n_lags = len(self.lags) if self.lags is not None else 0
        n_window_features = (
            len(self.X_train_window_features_names_out_)
            if self.window_features is not None
            else 0
        )
        n_exog = exog_values.shape[1] if exog_values is not None else 0
        n_features = n_lags + n_window_features + n_exog

        # Input matrix for prediction: shape (n_boot, n_features)
        X = np.full((n_boot, n_features), fill_value=np.nan, dtype=float)

        # Output predictions: shape (steps, n_boot)
        predictions = np.full((steps, n_boot), fill_value=np.nan, dtype=float)

        # Expand last_window to 2D: (window_size + steps, n_boot)
        # Each column represents a separate bootstrap trajectory
        last_window = np.tile(last_window_values[:, np.newaxis], (1, n_boot))
        last_window = np.vstack([last_window, np.full((steps, n_boot), np.nan)])

        estimator_name = type(self.estimator).__name__
        is_linear = isinstance(self.estimator, LinearModel)
        is_lightgbm = estimator_name == "LGBMRegressor"
        is_xgboost = estimator_name == "XGBRegressor"

        if is_linear:
            coef = self.estimator.coef_
            intercept = self.estimator.intercept_
        elif is_lightgbm:
            booster = self.estimator.booster_
        elif is_xgboost:
            booster = self.estimator.get_booster()

        has_lags = self.lags is not None
        has_window_features = self.window_features is not None
        has_exog = exog_values is not None

        for i in range(steps):

            if has_lags:
                for j, lag in enumerate(self.lags):
                    X[:, j] = last_window[-(lag + steps - i), :]

            if has_window_features:
                window_data = last_window[: -(steps - i), :]
                # transform accepts 2D: (window_length, n_boot) -> (n_boot, n_stats)
                # and concatenate along axis=1: (n_boot, total_window_features)
                X[:, n_lags : n_lags + n_window_features] = np.concatenate(
                    [wf.transform(window_data) for wf in self.window_features], axis=1
                )

            if has_exog:
                X[:, n_lags + n_window_features :] = exog_values[i]

            if is_linear:
                pred = np.dot(X, coef) + intercept
            elif is_lightgbm:
                pred = booster.predict(X)
            elif is_xgboost:
                pred = booster.inplace_predict(X)
            else:
                pred = self.estimator.predict(X).ravel()

            if use_binned_residuals:
                # sampled_residuals is a 3D array: (n_bins, steps, n_boot)
                boot_indices = np.arange(n_boot)
                pred_bins = self.binner.transform(pred).astype(int)
                pred += sampled_residuals[pred_bins, i, boot_indices]
            else:
                pred += sampled_residuals[i, :]

            predictions[i, :] = pred
            last_window[-(steps - i), :] = pred

        set_cpu_gpu_device(estimator=self.estimator, device=original_device)

        return predictions

    def create_predict_X(
        self,
        steps: int,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        check_inputs: bool = True,
    ) -> pd.DataFrame:
        """
        Create the predictors needed to predict `steps` ahead. As it is a recursive
        process, the predictors are created at each iteration of the prediction
        process.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            check_inputs:
                If `True`, the input is checked for possible warnings and errors
                with the `check_predict_input` function. This argument is created
                for internal use and is not recommended to be changed.
                Defaults to True.

        Returns:
            Pandas DataFrame with the predictors for each step. The index
            is the same as the prediction index.
        """

        (
            last_window_values,
            exog_values,
            prediction_index,
            steps,
        ) = self._create_predict_inputs(
            steps=steps,
            last_window=last_window,
            exog=exog,
            check_inputs=check_inputs,
        )

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            predictions = self._recursive_predict(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
            )

        X_predict = []
        full_predictors = np.concatenate((last_window_values, predictions))

        if self.lags is not None:
            idx = np.arange(-steps, 0)[:, None] - self.lags
            X_lags = full_predictors[idx + len(full_predictors)]
            X_predict.append(X_lags)

        if self.window_features is not None:
            X_window_features = np.full(
                shape=(steps, len(self.X_train_window_features_names_out_)),
                fill_value=np.nan,
                order="C",
                dtype=float,
            )
            for i in range(steps):
                X_window_features[i, :] = np.concatenate(
                    [
                        wf.transform(full_predictors[i : -(steps - i)])
                        for wf in self.window_features
                    ]
                )
            X_predict.append(X_window_features)

        if exog is not None:
            X_predict.append(exog_values)

        X_predict = pd.DataFrame(
            data=np.concatenate(X_predict, axis=1),
            columns=self.X_train_features_names_out_,
            index=prediction_index,
        )

        if self.exog_in_:
            categorical_features = any(
                not pd.api.types.is_numeric_dtype(dtype)
                or pd.api.types.is_bool_dtype(dtype)
                for dtype in set(self.exog_dtypes_out_.values())
            )
            if categorical_features:
                X_predict = X_predict.astype(self.exog_dtypes_out_)

        if self.transformer_y is not None or self.differentiation is not None:
            warnings.warn(
                "The output matrix is in the transformed scale due to the "
                "inclusion of transformations or differentiation in the Forecaster. "
                "As a result, any predictions generated using this matrix will also "
                "be in the transformed scale. Please refer to the documentation "
                "for more details: "
                "https://skforecast.org/latest/user_guides/training-and-prediction-matrices.html",
                DataTransformationWarning,
            )

        return X_predict

    def predict(
        self,
        steps: int | str | pd.Timestamp,
        last_window: Union[pd.Series, pd.DataFrame, None] = None,
        exog: Union[pd.Series, pd.DataFrame, None] = None,
        check_inputs: bool = True,
    ) -> pd.Series:
        """
        Predict future values recursively for the specified number of steps.

        Args:
            steps:
                Number of future steps to predict.
            last_window:
                Optional last window of observed values to use for prediction. If None, uses the last window from training.
                Must be a pandas Series or DataFrame with the same structure as the training target series. Defaults to None.
            exog:
                Optional exogenous variables for prediction. Can be a pandas Series or DataFrame.
                Must have the same structure as the exogenous variables used in training. Defaults to None.
            check_inputs:
                Whether to perform input validation checks. Defaults to True.

        Returns:
            Pandas Series of predicted values for the specified number of steps,
            indexed according to the prediction index constructed from the last window and the number of steps.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> y = pd.Series(np.arange(30), name='y')
            >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
            >>> forecaster = ForecasterRecursive(
            ...     estimator=LinearRegression(),
            ...     lags=3,
            ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
            ... )
            >>> forecaster.fit(y=y, exog=exog)
            >>> last_window = y.iloc[-3:]
            >>> exog_future = pd.DataFrame({'temp': np.random.randn(5)}, index=pd.RangeIndex(start=30, stop=35))
            >>> predictions = forecaster.predict(
            ...     steps=5, last_window=last_window, exog=exog_future, check_inputs=True
            ... )
            >>> isinstance(predictions, pd.Series)
            True
        """

        last_window_values, exog_values, prediction_index, steps = (
            self._create_predict_inputs(
                steps=steps,
                last_window=last_window,
                exog=exog,
                check_inputs=check_inputs,
            )
        )

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            predictions = self._recursive_predict(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
            )

        if self.differentiation is not None:
            predictions = self.differentiator.inverse_transform_next_window(predictions)

        predictions = transform_numpy(
            array=predictions,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=True,
        )

        predictions = pd.Series(data=predictions, index=prediction_index, name="pred")

        return predictions

    def predict_bootstrapping(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Generate multiple forecasting predictions using a bootstrapping process.
        By sampling from a collection of past observed errors (the residuals),
        each iteration of bootstrapping generates a different set of predictions.
        See the References section for more information.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            n_boot:
                Number of bootstrapping iterations to perform when estimating prediction
                intervals. Defaults to 250.
            use_in_sample_residuals:
                If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.
            random_state:
                Seed for the random number generator to ensure reproducibility. Defaults to 123.

        Returns:
            Pandas DataFrame with predictions generated by bootstrapping. Shape: (steps, n_boot).

        Raises:
            ValueError:
                If `steps` is not an integer or a valid date.
            ValueError:
                If `exog` is missing or has invalid shape.
            ValueError:
                If `n_boot` is not a positive integer.
            ValueError:
                If `use_in_sample_residuals=True` and `in_sample_residuals_` are not available.
            ValueError:
                If `use_in_sample_residuals=False` and `out_sample_residuals_` are not available.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> rng = np.random.default_rng(123)
            >>> y = pd.Series(rng.normal(size=100), name='y')
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> _ = forecaster.fit(y=y)
            >>> boot_preds = forecaster.predict_bootstrapping(steps=3, n_boot=5)
            >>> boot_preds.shape
            (3, 5)

        References:
            .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
                   https://otexts.com/fpp3/prediction-intervals.html
        """

        (
            last_window_values,
            exog_values,
            prediction_index,
            steps,
        ) = self._create_predict_inputs(
            steps=steps,
            last_window=last_window,
            exog=exog,
            predict_probabilistic=True,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
            check_inputs=True,
        )

        if use_in_sample_residuals:
            residuals = self.in_sample_residuals_
            residuals_by_bin = self.in_sample_residuals_by_bin_
        else:
            residuals = self.out_sample_residuals_
            residuals_by_bin = self.out_sample_residuals_by_bin_

        rng = np.random.default_rng(seed=random_state)
        if use_binned_residuals:
            # Create 3D array with sampled residuals: (n_bins, steps, n_boot)
            n_bins = len(residuals_by_bin)
            sampled_residuals = np.stack(
                [
                    residuals_by_bin[k][
                        rng.integers(
                            low=0, high=len(residuals_by_bin[k]), size=(steps, n_boot)
                        )
                    ]
                    for k in range(n_bins)
                ],
                axis=0,
            )
        else:
            sampled_residuals = residuals[
                rng.integers(low=0, high=len(residuals), size=(steps, n_boot))
            ]

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            boot_predictions = self._recursive_predict_bootstrapping(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
                sampled_residuals=sampled_residuals,
                use_binned_residuals=use_binned_residuals,
                n_boot=n_boot,
            )

        if self.differentiation is not None:
            boot_predictions = self.differentiator.inverse_transform_next_window(
                boot_predictions
            )

        if self.transformer_y:
            boot_predictions = transform_numpy(
                array=boot_predictions,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=True,
            )

        boot_columns = [f"pred_boot_{i}" for i in range(n_boot)]
        boot_predictions = pd.DataFrame(
            data=boot_predictions, index=prediction_index, columns=boot_columns
        )

        return boot_predictions

    def predict_quantiles(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        quantiles: list[float] | tuple[float] = [0.05, 0.5, 0.95],
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Calculate the specified quantiles for each step. After generating
        multiple forecasting predictions through a bootstrapping process, each
        quantile is calculated for each step.

        Args:
            steps: Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in` self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data.
            exog: Exogenous variable/s included as predictor/s.
            quantiles: Sequence of quantiles to compute, which must be between 0 and 1
                inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as
                `quantiles = [0.05, 0.5, 0.95]`.
            n_boot: Number of bootstrapping iterations to perform when estimating quantiles.
            use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method.
            use_binned_residuals: If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly.
            random_state: Seed for the random number generator to ensure reproducibility.

        Returns:
            Quantiles predicted by the forecaster.
        """

        check_interval(quantiles=quantiles)

        boot_predictions = self.predict_bootstrapping(
            steps=steps,
            last_window=last_window,
            exog=exog,
            n_boot=n_boot,
            random_state=random_state,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )

        predictions = boot_predictions.quantile(q=quantiles, axis=1).transpose()
        predictions.columns = [f"q_{q}" for q in quantiles]

        return predictions

    def predict_dist(
        self,
        steps: int | str | pd.Timestamp,
        distribution: object,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Fit a given probability distribution for each step. After generating
        multiple forecasting predictions through a bootstrapping process, each
        step is fitted to the given distribution.

        Args:
            steps: Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            distribution: A distribution object from scipy.stats with methods `_pdf` and `fit`.
                For example scipy.stats.norm.
            last_window: Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in` self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data.
            exog: Exogenous variable/s included as predictor/s.
            n_boot: Number of bootstrapping iterations to perform when estimating prediction
                intervals.
            use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method.
            use_binned_residuals: If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly.
            random_state: Seed for the random number generator to ensure reproducibility.

        Returns:
            Distribution parameters estimated for each step.
        """

        if not hasattr(distribution, "_pdf") or not callable(
            getattr(distribution, "fit", None)
        ):
            raise TypeError(
                "`distribution` must be a valid probability distribution object "
                "from scipy.stats, with methods `_pdf` and `fit`."
            )

        predictions = self.predict_bootstrapping(
            steps=steps,
            last_window=last_window,
            exog=exog,
            n_boot=n_boot,
            random_state=random_state,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )

        param_names = [
            p for p in inspect.signature(distribution._pdf).parameters if not p == "x"
        ] + ["loc", "scale"]

        predictions[param_names] = predictions.apply(
            lambda x: distribution.fit(x), axis=1, result_type="expand"
        )
        predictions = predictions[param_names]

        return predictions

    def _predict_interval_conformal(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        nominal_coverage: float = 0.95,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
    ) -> pd.DataFrame:
        """
        Generate prediction intervals using the conformal prediction
        split method [1]_.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in` self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            nominal_coverage:
                Nominal coverage, also known as expected coverage, of the prediction
                intervals. Must be between 0 and 1. Defaults to 0.95.
            use_in_sample_residuals:
                If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.

        Returns:
            Pandas DataFrame with values predicted by the forecaster and their estimated interval.
            - pred: predictions.
            - lower_bound: lower bound of the interval.
            - upper_bound: upper bound of the interval.

        Raises:
            ValueError:
                If `nominal_coverage` is not between 0 and 1.
            ValueError:
                If inputs are invalid (checked by `_create_predict_inputs`).

        Examples:
            >>> # Internal method, typically used via predict_interval(method='conformal')
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> rng = np.random.default_rng(123)
            >>> y = pd.Series(rng.normal(size=100), name='y')
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> _ = forecaster.fit(y=y)
            >>> preds = forecaster._predict_interval_conformal(steps=3, nominal_coverage=0.9)
            >>> preds.columns.tolist()
            ['pred', 'lower_bound', 'upper_bound']

        References:
            .. [1] MAPIE - Model Agnostic Prediction Interval Estimator.
                   https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method
        """

        last_window_values, exog_values, prediction_index, steps = (
            self._create_predict_inputs(
                steps=steps,
                last_window=last_window,
                exog=exog,
                predict_probabilistic=True,
                use_in_sample_residuals=use_in_sample_residuals,
                use_binned_residuals=use_binned_residuals,
                check_inputs=True,
            )
        )

        if use_in_sample_residuals:
            residuals = self.in_sample_residuals_
            residuals_by_bin = self.in_sample_residuals_by_bin_
        else:
            residuals = self.out_sample_residuals_
            residuals_by_bin = self.out_sample_residuals_by_bin_

        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="X does not have valid feature names",
                category=UserWarning,
            )
            predictions = self._recursive_predict(
                steps=steps,
                last_window_values=last_window_values,
                exog_values=exog_values,
            )

        if use_binned_residuals:
            # Fallback to global residuals if bin is empty
            if len(residuals) > 0:
                global_cf = np.quantile(np.abs(residuals), nominal_coverage)
            else:
                global_cf = np.nan

            correction_factor_by_bin = {}
            for k, v in residuals_by_bin.items():
                if len(v) > 0:
                    correction_factor_by_bin[k] = np.quantile(
                        np.abs(v), nominal_coverage
                    )
                else:
                    correction_factor_by_bin[k] = global_cf

            replace_func = np.vectorize(
                lambda x: correction_factor_by_bin.get(x, global_cf)
            )

            predictions_bin = self.binner.transform(predictions)
            correction_factor = replace_func(predictions_bin)
        else:
            correction_factor = np.quantile(np.abs(residuals), nominal_coverage)

        lower_bound = predictions - correction_factor
        upper_bound = predictions + correction_factor
        predictions = np.column_stack([predictions, lower_bound, upper_bound])

        if self.differentiation is not None:
            predictions = self.differentiator.inverse_transform_next_window(predictions)

        if self.transformer_y:
            predictions = transform_numpy(
                array=predictions,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=True,
            )

        predictions = pd.DataFrame(
            data=predictions,
            index=prediction_index,
            columns=["pred", "lower_bound", "upper_bound"],
        )

        return predictions

    def predict_interval(
        self,
        steps: int | str | pd.Timestamp,
        last_window: pd.Series | pd.DataFrame | None = None,
        exog: pd.Series | pd.DataFrame | None = None,
        method: str = "bootstrapping",
        interval: float | list[float] | tuple[float] = [5, 95],
        n_boot: int = 250,
        use_in_sample_residuals: bool = True,
        use_binned_residuals: bool = True,
        random_state: int = 123,
    ) -> pd.DataFrame:
        """
        Predict n steps ahead and estimate prediction intervals using either
        bootstrapping or conformal prediction methods. Refer to the References
        section for additional details on these methods.

        Args:
            steps:
                Number of steps to predict.
                - If steps is int, number of steps to predict.
                - If str or pandas Datetime, the prediction will be up to that date.
            last_window:
                Series values used to create the predictors (lags) needed in the
                first iteration of the prediction (t + 1).
                If `last_window = None`, the values stored in `self.last_window_` are
                used to calculate the initial predictors, and the predictions start
                right after training data. Defaults to None.
            exog:
                Exogenous variable/s included as predictor/s. Defaults to None.
            method:
                Technique used to estimate prediction intervals. Available options:
                - 'bootstrapping': Bootstrapping is used to generate prediction
                  intervals [1]_.
                - 'conformal': Employs the conformal prediction split method for
                  interval estimation [2]_.
                Defaults to 'bootstrapping'.
            interval:
                Confidence level of the prediction interval. Interpretation depends
                on the method used:
                - If `float`, represents the nominal (expected) coverage (between 0
                  and 1). For instance, `interval=0.95` corresponds to `[2.5, 97.5]`
                  percentiles.
                - If `list` or `tuple`, defines the exact percentiles to compute, which
                  must be between 0 and 100 inclusive. For example, interval
                  of 95% should be as `interval = [2.5, 97.5]`.
                - When using `method='conformal'`, the interval must be a float or
                  a list/tuple defining a symmetric interval.
                Defaults to [5, 95].
            n_boot:
                Number of bootstrapping iterations to perform when estimating prediction
                intervals. Defaults to 250.
            use_in_sample_residuals:
                If `True`, residuals from the training data are used as proxy of
                prediction error to create predictions.
                If `False`, out of sample residuals (calibration) are used.
                Out-of-sample residuals must be precomputed using Forecaster's
                `set_out_sample_residuals()` method. Defaults to True.
            use_binned_residuals:
                If `True`, residuals are selected based on the predicted values
                (binned selection).
                If `False`, residuals are selected randomly. Defaults to True.
            random_state:
                Seed for the random number generator to ensure reproducibility. Defaults to 123.

        Returns:
            Pandas DataFrame with values predicted by the forecaster and their estimated interval.
            - pred: predictions.
            - lower_bound: lower bound of the interval.
            - upper_bound: upper bound of the interval.

        Raises:
            ValueError:
                If `method` is not 'bootstrapping' or 'conformal'.
            ValueError:
                 If `interval` is invalid or not compatible with the chosen method.
            ValueError:
                If inputs (`steps`, `exog`, etc.) are invalid.

        Examples:
            >>> import numpy as np
            >>> import pandas as pd
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> rng = np.random.default_rng(123)
            >>> y = pd.Series(rng.normal(size=100), name='y')
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> _ = forecaster.fit(y=y)
            >>> # Bootstrapping method
            >>> intervals_boot = forecaster.predict_interval(
            ...     steps=3, method='bootstrapping', interval=[5, 95]
            ... )
            >>> intervals_boot.columns.tolist()
            ['pred', 'lower_bound', 'upper_bound']

            >>> # Conformal method
            >>> intervals_conf = forecaster.predict_interval(
            ...     steps=3, method='conformal', interval=0.95
            ... )
            >>> intervals_conf.columns.tolist()
            ['pred', 'lower_bound', 'upper_bound']

        References:
            .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
                   https://otexts.com/fpp3/prediction-intervals.html
            .. [2] MAPIE - Model Agnostic Prediction Interval Estimator.
                   https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method
        """

        if method == "bootstrapping":

            if isinstance(interval, (list, tuple)):
                check_interval(interval=interval, ensure_symmetric_intervals=False)
                interval = np.array(interval) / 100
            else:
                check_interval(alpha=interval, alpha_literal="interval")
                interval = np.array([0.5 - interval / 2, 0.5 + interval / 2])

            boot_predictions = self.predict_bootstrapping(
                steps=steps,
                last_window=last_window,
                exog=exog,
                n_boot=n_boot,
                random_state=random_state,
                use_in_sample_residuals=use_in_sample_residuals,
                use_binned_residuals=use_binned_residuals,
            )

            predictions = self.predict(
                steps=steps, last_window=last_window, exog=exog, check_inputs=False
            )

            predictions_interval = boot_predictions.quantile(
                q=interval, axis=1
            ).transpose()
            predictions_interval.columns = ["lower_bound", "upper_bound"]
            predictions = pd.concat((predictions, predictions_interval), axis=1)

        elif method == "conformal":

            if isinstance(interval, (list, tuple)):
                check_interval(interval=interval, ensure_symmetric_intervals=True)
                nominal_coverage = (interval[1] - interval[0]) / 100
            else:
                check_interval(alpha=interval, alpha_literal="interval")
                nominal_coverage = interval

            predictions = self._predict_interval_conformal(
                steps=steps,
                last_window=last_window,
                exog=exog,
                nominal_coverage=nominal_coverage,
                use_in_sample_residuals=use_in_sample_residuals,
                use_binned_residuals=use_binned_residuals,
            )
        else:
            raise ValueError(
                f"Invalid `method` '{method}'. Choose 'bootstrapping' or 'conformal'."
            )

        return predictions

    def _binning_in_sample_residuals(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
        store_in_sample_residuals: bool = False,
        random_state: int = 123,
    ) -> None:
        """
        Bin residuals according to the predicted value each residual is
        associated with. First a `skforecast.preprocessing.QuantileBinner` object
        is fitted to the predicted values. Then, residuals are binned according
        to the predicted value each residual is associated with. Residuals are
        stored in the forecaster object as `in_sample_residuals_` and
        `in_sample_residuals_by_bin_`.

        `y_true` and `y_pred` assumed to be differentiated and or transformed
        according to the attributes `differentiation` and `transformer_y`.
        The number of residuals stored per bin is limited to
        `10_000 // self.binner.n_bins_`. The total number of residuals stored is
        `10_000`.

        Args:
            y_true: True values of the time series.
            y_pred: Predicted values of the time series.
            store_in_sample_residuals: If `True`, in-sample residuals will be stored in the forecaster object
                after fitting (`in_sample_residuals_` and `in_sample_residuals_by_bin_`
                attributes). If `False`, only the intervals of the bins are stored.
            random_state: Set a seed for the random generator so that the stored sample
                residuals are always deterministic.
        """

        residuals = y_true - y_pred

        if self._probabilistic_mode == "binned":
            data = pd.DataFrame({"prediction": y_pred, "residuals": residuals})
            self.binner.fit(y_pred)
            self.binner_intervals_ = self.binner.intervals_

        if store_in_sample_residuals:
            rng = np.random.default_rng(seed=random_state)
            if self._probabilistic_mode == "binned":
                data["bin"] = self.binner.transform(y_pred).astype(int)
                self.in_sample_residuals_by_bin_ = (
                    data.groupby("bin")["residuals"].apply(np.array).to_dict()
                )

                max_sample = 10_000 // self.binner.n_bins_
                for k, v in self.in_sample_residuals_by_bin_.items():
                    if len(v) > max_sample:
                        sample = v[rng.integers(low=0, high=len(v), size=max_sample)]
                        self.in_sample_residuals_by_bin_[k] = sample

            if len(residuals) > 10_000:
                residuals = residuals[
                    rng.integers(low=0, high=len(residuals), size=10_000)
                ]

            self.in_sample_residuals_ = residuals

    def set_fit_kwargs(self, fit_kwargs: dict[str, object]) -> None:
        """
        Set new values for the additional keyword arguments passed to the `fit`
        method of the estimator.

        Args:
            fit_kwargs: Dict of the form {"argument": new_value}.
        """

        self.fit_kwargs = check_select_fit_kwargs(self.estimator, fit_kwargs=fit_kwargs)

    def set_lags(
        self, lags: Union[int, List[int], np.ndarray, range, None] = None
    ) -> None:
        """
        Set new value to the attribute `lags`. Attributes `lags_names`,
        `max_lag` and `window_size` are also updated.

        Args:
            lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
                - `int`: include lags from 1 to `lags` (included).
                - `list`, `1d numpy ndarray` or `range`: include only lags present in
                `lags`, all elements must be int.
                - `None`: no lags are included as predictors.
        """

        if self.window_features is None and lags is None:
            raise ValueError(
                "At least one of the arguments `lags` or `window_features` "
                "must be different from None. This is required to create the "
                "predictors used in training the forecaster."
            )

        self.lags, self.lags_names, self.max_lag = initialize_lags(
            type(self).__name__, lags
        )
        self.window_size = max(
            [
                ws
                for ws in [self.max_lag, self.max_size_window_features]
                if ws is not None
            ]
        )
        if self.differentiation is not None:
            self.window_size += self.differentiation
            self.differentiator.set_params(window_size=self.window_size)

    def set_window_features(
        self, window_features: object | list[object] | None = None
    ) -> None:
        """
        Set new value to the attribute `window_features`.

        Attributes `max_size_window_features`, `window_features_names`,
        `window_features_class_names` and `window_size` are also updated.

        Args:
            window_features: Instance or list of instances used to create window features.
                Window features are created from the original time series and are
                included as predictors.

        Returns:
            None

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> from spotforecast2_safe.preprocessing import RollingFeatures
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> rolling = RollingFeatures(stats=['mean', 'std'], window_sizes=[3, 5])
            >>> forecaster.set_window_features(window_features=rolling)
            >>> forecaster.window_features_names
            ['roll_mean_3', 'roll_std_3', 'roll_mean_5', 'roll_std_5']
            >>> forecaster.window_size
            5
        """

        if window_features is None and self.lags is None:
            raise ValueError(
                "At least one of the arguments `lags` or `window_features` "
                "must be different from None. This is required to create the "
                "predictors used in training the forecaster."
            )

        (
            self.window_features,
            self.window_features_names,
            self.max_size_window_features,
        ) = initialize_window_features(window_features)
        self.window_features_class_names = None
        if window_features is not None:
            self.window_features_class_names = [
                type(wf).__name__ for wf in self.window_features
            ]
        self.window_size = max(
            [
                ws
                for ws in [self.max_lag, self.max_size_window_features]
                if ws is not None
            ]
        )
        if self.differentiation is not None:
            self.window_size += self.differentiation
            self.differentiator.set_params(window_size=self.window_size)

    def get_feature_importances(self, sort_importance: bool = True) -> pd.DataFrame:
        """
        Return feature importances of the estimator stored in the forecaster.
        Only valid when estimator stores internally the feature importances in the
        attribute `feature_importances_` or `coef_`. Otherwise, returns `None`.

        Args:
            sort_importance: If `True`, sorts the feature importances in descending order.

        Returns:
            pd.DataFrame: Feature importances associated with each predictor.

        Raises:
            NotFittedError: If the forecaster is not fitted.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.fit(y=pd.Series(np.arange(20)))
            >>> forecaster.get_feature_importances()
              feature  importance
            0   lag_1         1.0
            1   lag_2         0.0
            2   lag_3         0.0
        """

        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `get_feature_importances()`."
            )

        if isinstance(self.estimator, Pipeline):
            estimator = self.estimator[-1]
        else:
            estimator = self.estimator

        if hasattr(estimator, "feature_importances_"):
            feature_importances = estimator.feature_importances_
        elif hasattr(estimator, "coef_"):
            feature_importances = estimator.coef_
        else:
            warnings.warn(
                f"Impossible to access feature importances for estimator of type "
                f"{type(estimator)}. This method is only valid when the "
                f"estimator stores internally the feature importances in the "
                f"attribute `feature_importances_` or `coef_`.",
                UserWarning,
            )
            feature_importances = None

        if feature_importances is not None:
            feature_importances = pd.DataFrame(
                {
                    "feature": self.X_train_features_names_out_,
                    "importance": feature_importances,
                }
            )
            if sort_importance:
                feature_importances = feature_importances.sort_values(
                    by="importance", ascending=False
                )

        return feature_importances

    def set_in_sample_residuals(
        self,
        y: pd.Series,
        exog: pd.Series | pd.DataFrame | None = None,
        random_state: int = 123,
    ) -> None:
        """
        Set in-sample residuals in case they were not calculated during the
        training process.

        In-sample residuals are calculated as the difference between the true
        values and the predictions made by the forecaster using the training
        data. The following internal attributes are updated:

        + `in_sample_residuals_`: residuals stored in a numpy ndarray.
        + `binner_intervals_`: intervals used to bin the residuals are calculated
        using the quantiles of the predicted values.
        + `in_sample_residuals_by_bin_`: residuals are binned according to the
        predicted value they are associated with and stored in a dictionary, where
        the keys are the intervals of the predicted values and the values are
        the residuals associated with that range.

        A total of 10_000 residuals are stored in the attribute `in_sample_residuals_`.
        If the number of residuals is greater than 10_000, a random sample of
        10_000 residuals is stored. The number of residuals stored per bin is
        limited to `10_000 // self.binner.n_bins_`.

        Args:
                y: Target time series.
            exog: Exogenous variables.
            random_state: Random state for reproducibility.

        Returns:
            None

        Raises:
            NotFittedError: If the forecaster is not fitted.
            IndexError: If the index range of `y` does not match the range
                used during training.
            ValueError: If the features generated from the provided data do not
                match those used during the training process.

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
            >>> forecaster.set_in_sample_residuals(y=pd.Series(np.arange(20)))
            >>> forecaster.in_sample_residuals_
            array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
        """
        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `set_in_sample_residuals()`."
            )

        check_y(y=y)
        y_index_range = check_extract_values_and_index(
            data=y, data_label="`y`", return_values=False
        )[1][[0, -1]]

        if not y_index_range.equals(self.training_range_):
            raise IndexError(
                f"The index range of `y` does not match the range "
                f"used during training. Please ensure the index is aligned "
                f"with the training data.\n"
                f"    Expected : {self.training_range_}\n"
                f"    Received : {y_index_range}"
            )

        (
            X_train,
            y_train,
            _,
            _,
            _,
            X_train_features_names_out_,
            *_,
        ) = self._create_train_X_y(y=y, exog=exog)

        if not X_train_features_names_out_ == self.X_train_features_names_out_:
            raise ValueError(
                f"Feature mismatch detected after matrix creation. The features "
                f"generated from the provided data do not match those used during "
                f"the training process. To correctly set in-sample residuals, "
                f"ensure that the same data and preprocessing steps are applied.\n"
                f"    Expected output : {self.X_train_features_names_out_}\n"
                f"    Current output  : {X_train_features_names_out_}"
            )

        self._binning_in_sample_residuals(
            y_true=y_train.to_numpy(),
            y_pred=self.estimator.predict(X_train).ravel(),
            store_in_sample_residuals=True,
            random_state=random_state,
        )

    def set_out_sample_residuals(
        self,
        y_true: np.ndarray | pd.Series,
        y_pred: np.ndarray | pd.Series,
        append: bool = False,
        random_state: int = 123,
    ) -> None:
        """
        Set new values to the attribute `out_sample_residuals_`.

        Out of sample residuals are meant to be calculated using observations that
        did not participate in the training process. `y_true` and `y_pred` are
        expected to be in the original scale of the time series. Residuals are
        calculated as `y_true` - `y_pred`, after applying the necessary
        transformations and differentiations if the forecaster includes them
        (`self.transformer_y` and `self.differentiation`). Two internal attributes
        are updated:

        + `out_sample_residuals_`: residuals stored in a numpy ndarray.
        + `out_sample_residuals_by_bin_`: residuals are binned according to the
        predicted value they are associated with and stored in a dictionary, where
        the keys are the intervals of the predicted values and the values are
        the residuals associated with that range. If a bin is empty, it is filled
        with a random sample of residuals from other bins. This is done to ensure
        that all bins have at least one residual and can be used in the prediction
        process.

        A total of 10_000 residuals are stored in the attribute `out_sample_residuals_`.
        If the number of residuals is greater than 10_000, a random sample of
        10_000 residuals is stored. The number of residuals stored per bin is
        limited to `10_000 // self.binner.n_bins_`.

        Args:
            y_true: True values of the time series in the original scale.
            y_pred: Predicted values of the time series in the original scale.
            append: If `True`, new residuals are added to the once already stored
                in the forecaster. If after appending the new residuals, the limit
                of `10_000 // self.binner.n_bins_` values per bin is reached, a
                random sample of residuals is stored.
            random_state: Random state for reproducibility.

        Returns:
            None

        Raises:
            NotFittedError: If the forecaster is not fitted.
            TypeError: If `y_true` or `y_pred` are not `numpy ndarray` or `pandas Series`.
            ValueError: If `y_true` and `y_pred` have different length or index (if Series).

        Examples:
            >>> from sklearn.linear_model import LinearRegression
            >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
            >>> import pandas as pd
            >>> import numpy as np
            >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
            >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
            >>> y_true = np.array([20, 21, 22, 23, 24])
            >>> y_pred = np.array([20.1, 20.9, 22.2, 22.8, 24.0])
            >>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=y_pred)
            >>> forecaster.out_sample_residuals_
            array([-0.1,  0.1, -0.2,  0.2,  0. ])
        """
        if not self.is_fitted:
            raise NotFittedError(
                "This forecaster is not fitted yet. Call `fit` with appropriate "
                "arguments before using `set_out_sample_residuals()`."
            )

        if not isinstance(y_true, (np.ndarray, pd.Series)):
            raise TypeError(
                f"`y_true` argument must be `numpy ndarray` or `pandas Series`. "
                f"Got {type(y_true)}."
            )

        if not isinstance(y_pred, (np.ndarray, pd.Series)):
            raise TypeError(
                f"`y_pred` argument must be `numpy ndarray` or `pandas Series`. "
                f"Got {type(y_pred)}."
            )

        if len(y_true) != len(y_pred):
            raise ValueError(
                f"`y_true` and `y_pred` must have the same length. "
                f"Got {len(y_true)} and {len(y_pred)}."
            )

        if isinstance(y_true, pd.Series) and isinstance(y_pred, pd.Series):
            if not y_true.index.equals(y_pred.index):
                raise ValueError("`y_true` and `y_pred` must have the same index.")

        if not isinstance(y_pred, np.ndarray):
            y_pred = y_pred.to_numpy()
        if not isinstance(y_true, np.ndarray):
            y_true = y_true.to_numpy()

        if self.transformer_y:
            y_true = transform_numpy(
                array=y_true,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=False,
            )
            y_pred = transform_numpy(
                array=y_pred,
                transformer=self.transformer_y,
                fit=False,
                inverse_transform=False,
            )

        if self.differentiation is not None:
            differentiator = copy(self.differentiator)
            differentiator.set_params(window_size=None)
            y_true = differentiator.fit_transform(y_true)[self.differentiation :]
            y_pred = differentiator.fit_transform(y_pred)[self.differentiation :]

        data = pd.DataFrame(
            {"prediction": y_pred, "residuals": y_true - y_pred}
        ).dropna()
        y_pred = data["prediction"].to_numpy()
        residuals = data["residuals"].to_numpy()

        if self.binner is not None:
            data["bin"] = self.binner.transform(y_pred).astype(int)
            residuals_by_bin = (
                data.groupby("bin")["residuals"].apply(np.array).to_dict()
            )
        else:
            residuals_by_bin = {}

        out_sample_residuals = (
            np.array([])
            if self.out_sample_residuals_ is None
            else self.out_sample_residuals_
        )
        out_sample_residuals_by_bin = (
            {}
            if self.out_sample_residuals_by_bin_ is None
            else self.out_sample_residuals_by_bin_
        )
        if append:
            out_sample_residuals = np.concatenate([out_sample_residuals, residuals])
            for k, v in residuals_by_bin.items():
                if k in out_sample_residuals_by_bin:
                    out_sample_residuals_by_bin[k] = np.concatenate(
                        (out_sample_residuals_by_bin[k], v)
                    )
                else:
                    out_sample_residuals_by_bin[k] = v
        else:
            out_sample_residuals = residuals
            out_sample_residuals_by_bin = residuals_by_bin

        if self.binner is not None:
            max_samples = 10_000 // self.binner.n_bins
            rng = np.random.default_rng(seed=random_state)

            for k, v in out_sample_residuals_by_bin.items():
                if len(v) > max_samples:
                    out_sample_residuals_by_bin[k] = rng.choice(
                        v, size=max_samples, replace=False
                    )

            bin_keys = (
                [] if self.binner_intervals_ is None else self.binner_intervals_.keys()
            )
            empty_bins = [
                k
                for k in bin_keys
                if k not in out_sample_residuals_by_bin
                or len(out_sample_residuals_by_bin[k]) == 0
            ]

            if empty_bins:
                warnings.warn(
                    f"The following bins have no out of sample residuals: {empty_bins}. "
                    f"No predicted values fall in the interval "
                    f"{[self.binner_intervals_[bin] for bin in empty_bins]}. "
                    f"Empty bins will be filled with a random sample of residuals.",
                    ResidualsUsageWarning,
                )
                empty_bin_size = min(max_samples, len(out_sample_residuals))
                for k in empty_bins:
                    out_sample_residuals_by_bin[k] = rng.choice(
                        a=out_sample_residuals, size=empty_bin_size, replace=False
                    )

        self.out_sample_residuals_ = out_sample_residuals
        self.out_sample_residuals_by_bin_ = out_sample_residuals_by_bin

__repr__()

Information displayed when a ForecasterRecursive object is printed.

Returns:

Name Type Description
str str

String representation of the forecaster with key information about its configuration and state.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> print(forecaster)
=========================
ForecasterRecursive
=========================
Estimator: LinearRegression
Lags: [1, 2, 3]
Window features: []
Window size: 3
Series name: None
Exogenous included: False
Exogenous names: None
Transformer for y: None
Transformer for exog: None
Weight function included: False
Differentiation order: None
Training range: None
Training index type: None
Training index frequency: None
Estimator parameters: {...}
fit_kwargs: {...}
Creation date: ...
Last fit date: None
spotforecast version: ...
Python version: ...
Forecaster id: None
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def __repr__(self) -> str:
    """
    Information displayed when a ForecasterRecursive object is printed.

    Returns:
        str: String representation of the forecaster with key information about its configuration and state.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> print(forecaster)  # doctest: +ELLIPSIS
        =========================
        ForecasterRecursive
        =========================
        Estimator: LinearRegression
        Lags: [1, 2, 3]
        Window features: []
        Window size: 3
        Series name: None
        Exogenous included: False
        Exogenous names: None
        Transformer for y: None
        Transformer for exog: None
        Weight function included: False
        Differentiation order: None
        Training range: None
        Training index type: None
        Training index frequency: None
        Estimator parameters: {...}
        fit_kwargs: {...}
        Creation date: ...
        Last fit date: None
        spotforecast version: ...
        Python version: ...
        Forecaster id: None

    """

    params = (
        self.estimator.get_params() if hasattr(self.estimator, "get_params") else {}
    )
    exog_names_in_ = self.exog_names_in_ if self.exog_in_ else None

    info = (
        f"{'=' * len(type(self).__name__)} \n"
        f"{type(self).__name__} \n"
        f"{'=' * len(type(self).__name__)} \n"
        f"Estimator: {type(self.estimator).__name__} \n"
        f"Lags: {self.lags} \n"
        f"Window features: {self.window_features_names} \n"
        f"Window size: {self.window_size} \n"
        f"Series name: {self.series_name_in_} \n"
        f"Exogenous included: {self.exog_in_} \n"
        f"Exogenous names: {exog_names_in_} \n"
        f"Transformer for y: {self.transformer_y} \n"
        f"Transformer for exog: {self.transformer_exog} \n"
        f"Weight function included: {True if self.weight_func is not None else False} \n"
        f"Differentiation order: {self.differentiation} \n"
        f"Training range: {self.training_range_.to_list() if self.is_fitted else None} \n"
        f"Training index type: {str(self.index_type_).split('.')[-1][:-2] if self.is_fitted else None} \n"
        f"Training index frequency: {self.index_freq_ if self.is_fitted else None} \n"
        f"Estimator parameters: {params} \n"
        f"fit_kwargs: {self.fit_kwargs} \n"
        f"Creation date: {self.creation_date} \n"
        f"Last fit date: {self.fit_date} \n"
        f"spotforecast version: {self.spotforecast_version} \n"
        f"Python version: {self.python_version} \n"
        f"Forecaster id: {self.forecaster_id} \n"
    )

    return info

__setstate__(state)

Custom setstate to ensure backward compatibility when unpickling. Only sets spotforecast_tags if not present, preserving custom tags.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def __setstate__(self, state: dict) -> None:
    """
    Custom __setstate__ to ensure backward compatibility when unpickling.
    Only sets __spotforecast_tags__ if not present, preserving custom tags.
    """
    super().__setstate__(state)
    if not hasattr(self, "__spotforecast_tags__"):
        self.__spotforecast_tags__ = {
            "library": "spotforecast",
            "forecaster_name": "ForecasterRecursive",
            "forecaster_task": "regression",
            "forecasting_scope": "single-series",
            "forecasting_strategy": "recursive",
            "index_types_supported": ["pandas.RangeIndex", "pandas.DatetimeIndex"],
            "requires_index_frequency": True,
            "allowed_input_types_series": ["pandas.Series"],
            "supports_exog": True,
            "allowed_input_types_exog": ["pandas.Series", "pandas.DataFrame"],
            "handles_missing_values_series": False,
            "handles_missing_values_exog": True,
            "supports_lags": True,
            "supports_window_features": True,
            "supports_transformer_series": True,
            "supports_transformer_exog": True,
            "supports_weight_func": True,
            "supports_differentiation": True,
            "prediction_types": [
                "point",
                "interval",
                "bootstrapping",
                "quantiles",
                "distribution",
            ],
            "supports_probabilistic": True,
            "probabilistic_methods": ["bootstrapping", "conformal"],
            "handles_binned_residuals": True,
        }

create_predict_X(steps, last_window=None, exog=None, check_inputs=True)

Create the predictors needed to predict steps ahead. As it is a recursive process, the predictors are created at each iteration of the prediction process.

Parameters:

Name Type Description Default
steps int

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored in self.last_window_ are used to calculate the initial predictors, and the predictions start right after training data. Defaults to None.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s. Defaults to None.

None
check_inputs bool

If True, the input is checked for possible warnings and errors with the check_predict_input function. This argument is created for internal use and is not recommended to be changed. Defaults to True.

True

Returns:

Type Description
DataFrame

Pandas DataFrame with the predictors for each step. The index

DataFrame

is the same as the prediction index.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def create_predict_X(
    self,
    steps: int,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    check_inputs: bool = True,
) -> pd.DataFrame:
    """
    Create the predictors needed to predict `steps` ahead. As it is a recursive
    process, the predictors are created at each iteration of the prediction
    process.

    Args:
        steps:
            Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window:
            Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data. Defaults to None.
        exog:
            Exogenous variable/s included as predictor/s. Defaults to None.
        check_inputs:
            If `True`, the input is checked for possible warnings and errors
            with the `check_predict_input` function. This argument is created
            for internal use and is not recommended to be changed.
            Defaults to True.

    Returns:
        Pandas DataFrame with the predictors for each step. The index
        is the same as the prediction index.
    """

    (
        last_window_values,
        exog_values,
        prediction_index,
        steps,
    ) = self._create_predict_inputs(
        steps=steps,
        last_window=last_window,
        exog=exog,
        check_inputs=check_inputs,
    )

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        predictions = self._recursive_predict(
            steps=steps,
            last_window_values=last_window_values,
            exog_values=exog_values,
        )

    X_predict = []
    full_predictors = np.concatenate((last_window_values, predictions))

    if self.lags is not None:
        idx = np.arange(-steps, 0)[:, None] - self.lags
        X_lags = full_predictors[idx + len(full_predictors)]
        X_predict.append(X_lags)

    if self.window_features is not None:
        X_window_features = np.full(
            shape=(steps, len(self.X_train_window_features_names_out_)),
            fill_value=np.nan,
            order="C",
            dtype=float,
        )
        for i in range(steps):
            X_window_features[i, :] = np.concatenate(
                [
                    wf.transform(full_predictors[i : -(steps - i)])
                    for wf in self.window_features
                ]
            )
        X_predict.append(X_window_features)

    if exog is not None:
        X_predict.append(exog_values)

    X_predict = pd.DataFrame(
        data=np.concatenate(X_predict, axis=1),
        columns=self.X_train_features_names_out_,
        index=prediction_index,
    )

    if self.exog_in_:
        categorical_features = any(
            not pd.api.types.is_numeric_dtype(dtype)
            or pd.api.types.is_bool_dtype(dtype)
            for dtype in set(self.exog_dtypes_out_.values())
        )
        if categorical_features:
            X_predict = X_predict.astype(self.exog_dtypes_out_)

    if self.transformer_y is not None or self.differentiation is not None:
        warnings.warn(
            "The output matrix is in the transformed scale due to the "
            "inclusion of transformations or differentiation in the Forecaster. "
            "As a result, any predictions generated using this matrix will also "
            "be in the transformed scale. Please refer to the documentation "
            "for more details: "
            "https://skforecast.org/latest/user_guides/training-and-prediction-matrices.html",
            DataTransformationWarning,
        )

    return X_predict

create_sample_weights(X_train)

Create weights for each observation according to the forecaster's attribute weight_func.

Parameters:

Name Type Description Default
X_train DataFrame

Dataframe created with the create_train_X_y method, first return.

required

Returns:

Type Description
ndarray

Weights to use in fit method.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def create_sample_weights(self, X_train: pd.DataFrame) -> np.ndarray:
    """
    Create weights for each observation according to the forecaster's attribute
    `weight_func`.

    Args:
        X_train: Dataframe created with the `create_train_X_y` method, first return.

    Returns:
        Weights to use in `fit` method.
    """

    sample_weight = None

    if self.weight_func is not None:
        sample_weight = self.weight_func(X_train.index)

    if sample_weight is not None:
        if np.isnan(sample_weight).any():
            raise ValueError(
                "The resulting `sample_weight` cannot have NaN values."
            )
        if np.any(sample_weight < 0):
            raise ValueError(
                "The resulting `sample_weight` cannot have negative values."
            )
        if np.sum(sample_weight) == 0:
            raise ValueError(
                "The resulting `sample_weight` cannot be normalized because "
                "the sum of the weights is zero."
            )

    return sample_weight

create_train_X_y(y, exog=None)

Public method to create training predictors and target values.

This method is a public wrapper around the internal method _create_train_X_y, which generates the training predictors and target values based on the provided time series and exogenous variables. It ensures that the necessary transformations and feature engineering steps are applied to prepare the data for training the forecaster.

Parameters:

Name Type Description Default
y Series

Target series for training. Must be a pandas Series.

required
exog Union[Series, DataFrame, None]

Optional exogenous variables for training. Can be a pandas Series or DataFrame. Must have the same index as y and cover the same time range. Defaults to None.

None

Returns:

Type Description
Tuple[DataFrame, Series]

Tuple containing: - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided). - y_train: Series of target values aligned with the predictors.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> y = pd.Series(np.arange(30), name='y')
>>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=3,
...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
... )
>>> X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
>>> isinstance(X_train, pd.DataFrame)
True
>>> isinstance(y_train, pd.Series)
True
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def create_train_X_y(
    self, y: pd.Series, exog: Union[pd.Series, pd.DataFrame, None] = None
) -> Tuple[pd.DataFrame, pd.Series]:
    """Public method to create training predictors and target values.

    This method is a public wrapper around the internal method `_create_train_X_y`,
    which generates the training predictors and target values based on the provided time series and exogenous variables.
    It ensures that the necessary transformations and feature engineering steps are applied to prepare the data for training the forecaster.

    Args:
        y: Target series for training. Must be a pandas Series.
        exog: Optional exogenous variables for training. Can be a pandas Series or DataFrame. Must have the same index as `y` and cover the same time range. Defaults to None.

    Returns:
        Tuple containing:
            - X_train: DataFrame of training predictors including lags, window features, and exogenous variables (if provided).
            - y_train: Series of target values aligned with the predictors.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> y = pd.Series(np.arange(30), name='y')
        >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
        >>> forecaster = ForecasterRecursive(
        ...     estimator=LinearRegression(),
        ...     lags=3,
        ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
        ... )
        >>> X_train, y_train = forecaster.create_train_X_y(y=y, exog=exog)
        >>> isinstance(X_train, pd.DataFrame)
        True
        >>> isinstance(y_train, pd.Series)
        True

    """
    output = self._create_train_X_y(y=y, exog=exog)

    return output[0], output[1]

fit(y, exog=None, store_last_window=True, store_in_sample_residuals=False, random_state=123, suppress_warnings=False)

Fit the forecaster to the training data.

Parameters:

Name Type Description Default
y Series

Target series for training. Must be a pandas Series.

required
exog Union[Series, DataFrame, None]

Optional exogenous variables for training. Can be a pandas Series or DataFrame.Must have the same index as y and cover the same time range. Defaults to None.

None
store_last_window bool

Whether to store the last window of the training series for use in prediction. Defaults to True.

True
store_in_sample_residuals bool

Whether to store in-sample residuals after fitting, which can be used for certain probabilistic prediction methods. Defaults to False.

False
random_state int

Random seed for reproducibility when sampling residuals if store_in_sample_residuals is True. Defaults to 123.

123
suppress_warnings bool

Whether to suppress warnings during fitting, such as those related to insufficient data length for lags or window features. Defaults to False.

False

Returns:

Type Description
None

None

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> y = pd.Series(np.arange(30), name='y')
>>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=3,
...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
... )
>>> forecaster.fit(y=y, exog=exog, store_in_sample_residuals=True)
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def fit(
    self,
    y: pd.Series,
    exog: Union[pd.Series, pd.DataFrame, None] = None,
    store_last_window: bool = True,
    store_in_sample_residuals: bool = False,
    random_state: int = 123,
    suppress_warnings: bool = False,
) -> None:
    """
    Fit the forecaster to the training data.

    Args:
        y:
              Target series for training. Must be a pandas Series.
        exog:
              Optional exogenous variables for training. Can be a pandas Series or DataFrame.Must have the same index as `y` and cover the same time range. Defaults to None.
        store_last_window:
              Whether to store the last window of the training series for use in prediction. Defaults to True.
        store_in_sample_residuals:
              Whether to store in-sample residuals after fitting, which can be used for certain probabilistic prediction methods. Defaults to False.
        random_state:
              Random seed for reproducibility when sampling residuals if `store_in_sample_residuals` is True. Defaults to 123.
        suppress_warnings:
              Whether to suppress warnings during fitting, such as those related to insufficient data length for lags or window features. Defaults to False.

    Returns:
        None

    Examples:
             >>> import numpy as np
             >>> import pandas as pd
             >>> from sklearn.linear_model import LinearRegression
             >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
             >>> from spotforecast2_safe.preprocessing import RollingFeatures
             >>> y = pd.Series(np.arange(30), name='y')
             >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
             >>> forecaster = ForecasterRecursive(
             ...     estimator=LinearRegression(),
             ...     lags=3,
             ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
             ... )
             >>> forecaster.fit(y=y, exog=exog, store_in_sample_residuals=True)
    """

    set_skforecast_warnings(suppress_warnings, action="ignore")

    # Reset values in case the forecaster has already been fitted.
    self.last_window_ = None
    self.index_type_ = None
    self.index_freq_ = None
    self.training_range_ = None
    self.series_name_in_ = None
    self.exog_in_ = False
    self.exog_names_in_ = None
    self.exog_type_in_ = None
    self.exog_dtypes_in_ = None
    self.exog_dtypes_out_ = None
    self.X_train_window_features_names_out_ = None
    self.X_train_exog_names_out_ = None
    self.X_train_features_names_out_ = None
    self.in_sample_residuals_ = None
    self.in_sample_residuals_by_bin_ = None
    self.binner_intervals_ = None
    self.is_fitted = False
    self.fit_date = None

    (
        X_train,
        y_train,
        exog_names_in_,
        X_train_window_features_names_out_,
        X_train_exog_names_out_,
        X_train_features_names_out_,
        exog_dtypes_in_,
        exog_dtypes_out_,
    ) = self._create_train_X_y(y=y, exog=exog)

    sample_weight = self.create_sample_weights(X_train=X_train)

    if sample_weight is not None:
        self.estimator.fit(
            X=X_train,
            y=y_train,
            sample_weight=sample_weight,
            **self.fit_kwargs,
        )
    else:
        self.estimator.fit(X=X_train, y=y_train, **self.fit_kwargs)

    self.X_train_window_features_names_out_ = X_train_window_features_names_out_
    self.X_train_features_names_out_ = X_train_features_names_out_

    self.is_fitted = True
    self.fit_date = pd.Timestamp.today().strftime("%Y-%m-%d %H:%M:%S")
    self.training_range_ = y.index[[0, -1]]
    self.index_type_ = type(y.index)
    if isinstance(y.index, pd.DatetimeIndex):
        self.index_freq_ = y.index.freqstr
    else:
        try:
            self.index_freq_ = y.index.step
        except AttributeError:
            self.index_freq_ = None

    if exog is not None:
        self.exog_in_ = True
        self.exog_type_in_ = type(exog)
        self.exog_names_in_ = exog_names_in_
        self.exog_dtypes_in_ = exog_dtypes_in_
        self.exog_dtypes_out_ = exog_dtypes_out_
        self.X_train_exog_names_out_ = X_train_exog_names_out_

    self.series_name_in_ = y.name if y.name is not None else "y"

    # NOTE: This is done to save time during fit in functions such as backtesting()
    if self._probabilistic_mode is not False:
        self._binning_in_sample_residuals(
            y_true=y_train.to_numpy(),
            y_pred=self.estimator.predict(X_train).ravel(),
            store_in_sample_residuals=store_in_sample_residuals,
            random_state=random_state,
        )

    if store_last_window:
        self.last_window_ = (
            y.iloc[-self.window_size :]
            .copy()
            .to_frame(name=y.name if y.name is not None else "y")
        )

    set_skforecast_warnings(suppress_warnings, action="default")

get_feature_importances(sort_importance=True)

Return feature importances of the estimator stored in the forecaster. Only valid when estimator stores internally the feature importances in the attribute feature_importances_ or coef_. Otherwise, returns None.

Parameters:

Name Type Description Default
sort_importance bool

If True, sorts the feature importances in descending order.

True

Returns:

Type Description
DataFrame

pd.DataFrame: Feature importances associated with each predictor.

Raises:

Type Description
NotFittedError

If the forecaster is not fitted.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.fit(y=pd.Series(np.arange(20)))
>>> forecaster.get_feature_importances()
  feature  importance
0   lag_1         1.0
1   lag_2         0.0
2   lag_3         0.0
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def get_feature_importances(self, sort_importance: bool = True) -> pd.DataFrame:
    """
    Return feature importances of the estimator stored in the forecaster.
    Only valid when estimator stores internally the feature importances in the
    attribute `feature_importances_` or `coef_`. Otherwise, returns `None`.

    Args:
        sort_importance: If `True`, sorts the feature importances in descending order.

    Returns:
        pd.DataFrame: Feature importances associated with each predictor.

    Raises:
        NotFittedError: If the forecaster is not fitted.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.fit(y=pd.Series(np.arange(20)))
        >>> forecaster.get_feature_importances()
          feature  importance
        0   lag_1         1.0
        1   lag_2         0.0
        2   lag_3         0.0
    """

    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `get_feature_importances()`."
        )

    if isinstance(self.estimator, Pipeline):
        estimator = self.estimator[-1]
    else:
        estimator = self.estimator

    if hasattr(estimator, "feature_importances_"):
        feature_importances = estimator.feature_importances_
    elif hasattr(estimator, "coef_"):
        feature_importances = estimator.coef_
    else:
        warnings.warn(
            f"Impossible to access feature importances for estimator of type "
            f"{type(estimator)}. This method is only valid when the "
            f"estimator stores internally the feature importances in the "
            f"attribute `feature_importances_` or `coef_`.",
            UserWarning,
        )
        feature_importances = None

    if feature_importances is not None:
        feature_importances = pd.DataFrame(
            {
                "feature": self.X_train_features_names_out_,
                "importance": feature_importances,
            }
        )
        if sort_importance:
            feature_importances = feature_importances.sort_values(
                by="importance", ascending=False
            )

    return feature_importances

get_params(deep=True)

Get parameters for this forecaster.

Parameters:

Name Type Description Default
deep bool

If True, will return the parameters for this forecaster and contained sub-objects that are estimators.

True

Returns:

Name Type Description
params Dict[str, object]

Dictionary of parameter names mapped to their values.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.get_params()
{
    'estimator': LinearRegression(), 'lags': 3, 'window_features': None,
    'transformer_y': None, 'transformer_exog': None, 'weight_func': None,
    'differentiation': None, 'fit_kwargs': {}, 'binner_kwargs': None, 'forecaster_id': '...'}
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def get_params(self, deep: bool = True) -> Dict[str, object]:
    """
    Get parameters for this forecaster.

    Args:
        deep: If True, will return the parameters for this forecaster and
            contained sub-objects that are estimators.

    Returns:
        params: Dictionary of parameter names mapped to their values.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.get_params()  # doctest: +ELLIPSIS
        {
            'estimator': LinearRegression(), 'lags': 3, 'window_features': None,
            'transformer_y': None, 'transformer_exog': None, 'weight_func': None,
            'differentiation': None, 'fit_kwargs': {}, 'binner_kwargs': None, 'forecaster_id': '...'}
    """
    params = {}
    for key in [
        "estimator",
        "lags",
        "window_features",
        "transformer_y",
        "transformer_exog",
        "weight_func",
        "differentiation",
        "fit_kwargs",
        "binner_kwargs",
        "forecaster_id",
    ]:
        if hasattr(self, key):
            params[key] = getattr(self, key)

    if not deep:
        return params

    if hasattr(self, "estimator") and self.estimator is not None:
        if hasattr(self.estimator, "get_params"):
            for key, value in self.estimator.get_params(deep=True).items():
                params[f"estimator__{key}"] = value

    return params

predict(steps, last_window=None, exog=None, check_inputs=True)

Predict future values recursively for the specified number of steps.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of future steps to predict.

required
last_window Union[Series, DataFrame, None]

Optional last window of observed values to use for prediction. If None, uses the last window from training. Must be a pandas Series or DataFrame with the same structure as the training target series. Defaults to None.

None
exog Union[Series, DataFrame, None]

Optional exogenous variables for prediction. Can be a pandas Series or DataFrame. Must have the same structure as the exogenous variables used in training. Defaults to None.

None
check_inputs bool

Whether to perform input validation checks. Defaults to True.

True

Returns:

Type Description
Series

Pandas Series of predicted values for the specified number of steps,

Series

indexed according to the prediction index constructed from the last window and the number of steps.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> y = pd.Series(np.arange(30), name='y')
>>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
>>> forecaster = ForecasterRecursive(
...     estimator=LinearRegression(),
...     lags=3,
...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
... )
>>> forecaster.fit(y=y, exog=exog)
>>> last_window = y.iloc[-3:]
>>> exog_future = pd.DataFrame({'temp': np.random.randn(5)}, index=pd.RangeIndex(start=30, stop=35))
>>> predictions = forecaster.predict(
...     steps=5, last_window=last_window, exog=exog_future, check_inputs=True
... )
>>> isinstance(predictions, pd.Series)
True
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict(
    self,
    steps: int | str | pd.Timestamp,
    last_window: Union[pd.Series, pd.DataFrame, None] = None,
    exog: Union[pd.Series, pd.DataFrame, None] = None,
    check_inputs: bool = True,
) -> pd.Series:
    """
    Predict future values recursively for the specified number of steps.

    Args:
        steps:
            Number of future steps to predict.
        last_window:
            Optional last window of observed values to use for prediction. If None, uses the last window from training.
            Must be a pandas Series or DataFrame with the same structure as the training target series. Defaults to None.
        exog:
            Optional exogenous variables for prediction. Can be a pandas Series or DataFrame.
            Must have the same structure as the exogenous variables used in training. Defaults to None.
        check_inputs:
            Whether to perform input validation checks. Defaults to True.

    Returns:
        Pandas Series of predicted values for the specified number of steps,
        indexed according to the prediction index constructed from the last window and the number of steps.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> y = pd.Series(np.arange(30), name='y')
        >>> exog = pd.DataFrame({'temp': np.random.randn(30)}, index=y.index)
        >>> forecaster = ForecasterRecursive(
        ...     estimator=LinearRegression(),
        ...     lags=3,
        ...     window_features=[RollingFeatures(stats='mean', window_sizes=3)]
        ... )
        >>> forecaster.fit(y=y, exog=exog)
        >>> last_window = y.iloc[-3:]
        >>> exog_future = pd.DataFrame({'temp': np.random.randn(5)}, index=pd.RangeIndex(start=30, stop=35))
        >>> predictions = forecaster.predict(
        ...     steps=5, last_window=last_window, exog=exog_future, check_inputs=True
        ... )
        >>> isinstance(predictions, pd.Series)
        True
    """

    last_window_values, exog_values, prediction_index, steps = (
        self._create_predict_inputs(
            steps=steps,
            last_window=last_window,
            exog=exog,
            check_inputs=check_inputs,
        )
    )

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        predictions = self._recursive_predict(
            steps=steps,
            last_window_values=last_window_values,
            exog_values=exog_values,
        )

    if self.differentiation is not None:
        predictions = self.differentiator.inverse_transform_next_window(predictions)

    predictions = transform_numpy(
        array=predictions,
        transformer=self.transformer_y,
        fit=False,
        inverse_transform=True,
    )

    predictions = pd.Series(data=predictions, index=prediction_index, name="pred")

    return predictions

predict_bootstrapping(steps, last_window=None, exog=None, n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Generate multiple forecasting predictions using a bootstrapping process. By sampling from a collection of past observed errors (the residuals), each iteration of bootstrapping generates a different set of predictions. See the References section for more information.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored in self.last_window_ are used to calculate the initial predictors, and the predictions start right after training data. Defaults to None.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s. Defaults to None.

None
n_boot int

Number of bootstrapping iterations to perform when estimating prediction intervals. Defaults to 250.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method. Defaults to True.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly. Defaults to True.

True
random_state int

Seed for the random number generator to ensure reproducibility. Defaults to 123.

123

Returns:

Type Description
DataFrame

Pandas DataFrame with predictions generated by bootstrapping. Shape: (steps, n_boot).

Raises:

Type Description
ValueError

If steps is not an integer or a valid date.

ValueError

If exog is missing or has invalid shape.

ValueError

If n_boot is not a positive integer.

ValueError

If use_in_sample_residuals=True and in_sample_residuals_ are not available.

ValueError

If use_in_sample_residuals=False and out_sample_residuals_ are not available.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> rng = np.random.default_rng(123)
>>> y = pd.Series(rng.normal(size=100), name='y')
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> _ = forecaster.fit(y=y)
>>> boot_preds = forecaster.predict_bootstrapping(steps=3, n_boot=5)
>>> boot_preds.shape
(3, 5)
References

.. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos. https://otexts.com/fpp3/prediction-intervals.html

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_bootstrapping(
    self,
    steps: int | str | pd.Timestamp,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Generate multiple forecasting predictions using a bootstrapping process.
    By sampling from a collection of past observed errors (the residuals),
    each iteration of bootstrapping generates a different set of predictions.
    See the References section for more information.

    Args:
        steps:
            Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window:
            Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data. Defaults to None.
        exog:
            Exogenous variable/s included as predictor/s. Defaults to None.
        n_boot:
            Number of bootstrapping iterations to perform when estimating prediction
            intervals. Defaults to 250.
        use_in_sample_residuals:
            If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method. Defaults to True.
        use_binned_residuals:
            If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly. Defaults to True.
        random_state:
            Seed for the random number generator to ensure reproducibility. Defaults to 123.

    Returns:
        Pandas DataFrame with predictions generated by bootstrapping. Shape: (steps, n_boot).

    Raises:
        ValueError:
            If `steps` is not an integer or a valid date.
        ValueError:
            If `exog` is missing or has invalid shape.
        ValueError:
            If `n_boot` is not a positive integer.
        ValueError:
            If `use_in_sample_residuals=True` and `in_sample_residuals_` are not available.
        ValueError:
            If `use_in_sample_residuals=False` and `out_sample_residuals_` are not available.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> rng = np.random.default_rng(123)
        >>> y = pd.Series(rng.normal(size=100), name='y')
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> _ = forecaster.fit(y=y)
        >>> boot_preds = forecaster.predict_bootstrapping(steps=3, n_boot=5)
        >>> boot_preds.shape
        (3, 5)

    References:
        .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
               https://otexts.com/fpp3/prediction-intervals.html
    """

    (
        last_window_values,
        exog_values,
        prediction_index,
        steps,
    ) = self._create_predict_inputs(
        steps=steps,
        last_window=last_window,
        exog=exog,
        predict_probabilistic=True,
        use_in_sample_residuals=use_in_sample_residuals,
        use_binned_residuals=use_binned_residuals,
        check_inputs=True,
    )

    if use_in_sample_residuals:
        residuals = self.in_sample_residuals_
        residuals_by_bin = self.in_sample_residuals_by_bin_
    else:
        residuals = self.out_sample_residuals_
        residuals_by_bin = self.out_sample_residuals_by_bin_

    rng = np.random.default_rng(seed=random_state)
    if use_binned_residuals:
        # Create 3D array with sampled residuals: (n_bins, steps, n_boot)
        n_bins = len(residuals_by_bin)
        sampled_residuals = np.stack(
            [
                residuals_by_bin[k][
                    rng.integers(
                        low=0, high=len(residuals_by_bin[k]), size=(steps, n_boot)
                    )
                ]
                for k in range(n_bins)
            ],
            axis=0,
        )
    else:
        sampled_residuals = residuals[
            rng.integers(low=0, high=len(residuals), size=(steps, n_boot))
        ]

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        boot_predictions = self._recursive_predict_bootstrapping(
            steps=steps,
            last_window_values=last_window_values,
            exog_values=exog_values,
            sampled_residuals=sampled_residuals,
            use_binned_residuals=use_binned_residuals,
            n_boot=n_boot,
        )

    if self.differentiation is not None:
        boot_predictions = self.differentiator.inverse_transform_next_window(
            boot_predictions
        )

    if self.transformer_y:
        boot_predictions = transform_numpy(
            array=boot_predictions,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=True,
        )

    boot_columns = [f"pred_boot_{i}" for i in range(n_boot)]
    boot_predictions = pd.DataFrame(
        data=boot_predictions, index=prediction_index, columns=boot_columns
    )

    return boot_predictions

predict_dist(steps, distribution, last_window=None, exog=None, n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Fit a given probability distribution for each step. After generating multiple forecasting predictions through a bootstrapping process, each step is fitted to the given distribution.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
distribution object

A distribution object from scipy.stats with methods _pdf and fit. For example scipy.stats.norm.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored inself.last_window_ are used to calculate the initial predictors, and the predictions start right after training data.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s.

None
n_boot int

Number of bootstrapping iterations to perform when estimating prediction intervals.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly.

True
random_state int

Seed for the random number generator to ensure reproducibility.

123

Returns:

Type Description
DataFrame

Distribution parameters estimated for each step.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_dist(
    self,
    steps: int | str | pd.Timestamp,
    distribution: object,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Fit a given probability distribution for each step. After generating
    multiple forecasting predictions through a bootstrapping process, each
    step is fitted to the given distribution.

    Args:
        steps: Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        distribution: A distribution object from scipy.stats with methods `_pdf` and `fit`.
            For example scipy.stats.norm.
        last_window: Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in` self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data.
        exog: Exogenous variable/s included as predictor/s.
        n_boot: Number of bootstrapping iterations to perform when estimating prediction
            intervals.
        use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method.
        use_binned_residuals: If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly.
        random_state: Seed for the random number generator to ensure reproducibility.

    Returns:
        Distribution parameters estimated for each step.
    """

    if not hasattr(distribution, "_pdf") or not callable(
        getattr(distribution, "fit", None)
    ):
        raise TypeError(
            "`distribution` must be a valid probability distribution object "
            "from scipy.stats, with methods `_pdf` and `fit`."
        )

    predictions = self.predict_bootstrapping(
        steps=steps,
        last_window=last_window,
        exog=exog,
        n_boot=n_boot,
        random_state=random_state,
        use_in_sample_residuals=use_in_sample_residuals,
        use_binned_residuals=use_binned_residuals,
    )

    param_names = [
        p for p in inspect.signature(distribution._pdf).parameters if not p == "x"
    ] + ["loc", "scale"]

    predictions[param_names] = predictions.apply(
        lambda x: distribution.fit(x), axis=1, result_type="expand"
    )
    predictions = predictions[param_names]

    return predictions

predict_interval(steps, last_window=None, exog=None, method='bootstrapping', interval=[5, 95], n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Predict n steps ahead and estimate prediction intervals using either bootstrapping or conformal prediction methods. Refer to the References section for additional details on these methods.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored in self.last_window_ are used to calculate the initial predictors, and the predictions start right after training data. Defaults to None.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s. Defaults to None.

None
method str

Technique used to estimate prediction intervals. Available options: - 'bootstrapping': Bootstrapping is used to generate prediction intervals [1]. - 'conformal': Employs the conformal prediction split method for interval estimation [2]. Defaults to 'bootstrapping'.

'bootstrapping'
interval float | list[float] | tuple[float]

Confidence level of the prediction interval. Interpretation depends on the method used: - If float, represents the nominal (expected) coverage (between 0 and 1). For instance, interval=0.95 corresponds to [2.5, 97.5] percentiles. - If list or tuple, defines the exact percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5]. - When using method='conformal', the interval must be a float or a list/tuple defining a symmetric interval. Defaults to [5, 95].

[5, 95]
n_boot int

Number of bootstrapping iterations to perform when estimating prediction intervals. Defaults to 250.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method. Defaults to True.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly. Defaults to True.

True
random_state int

Seed for the random number generator to ensure reproducibility. Defaults to 123.

123

Returns:

Type Description
DataFrame

Pandas DataFrame with values predicted by the forecaster and their estimated interval.

DataFrame
  • pred: predictions.
DataFrame
  • lower_bound: lower bound of the interval.
DataFrame
  • upper_bound: upper bound of the interval.

Raises:

Type Description
ValueError

If method is not 'bootstrapping' or 'conformal'.

ValueError

If interval is invalid or not compatible with the chosen method.

ValueError

If inputs (steps, exog, etc.) are invalid.

Examples:

>>> import numpy as np
>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> rng = np.random.default_rng(123)
>>> y = pd.Series(rng.normal(size=100), name='y')
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> _ = forecaster.fit(y=y)
>>> # Bootstrapping method
>>> intervals_boot = forecaster.predict_interval(
...     steps=3, method='bootstrapping', interval=[5, 95]
... )
>>> intervals_boot.columns.tolist()
['pred', 'lower_bound', 'upper_bound']
>>> # Conformal method
>>> intervals_conf = forecaster.predict_interval(
...     steps=3, method='conformal', interval=0.95
... )
>>> intervals_conf.columns.tolist()
['pred', 'lower_bound', 'upper_bound']
References

.. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos. https://otexts.com/fpp3/prediction-intervals.html .. [2] MAPIE - Model Agnostic Prediction Interval Estimator. https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_interval(
    self,
    steps: int | str | pd.Timestamp,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    method: str = "bootstrapping",
    interval: float | list[float] | tuple[float] = [5, 95],
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Predict n steps ahead and estimate prediction intervals using either
    bootstrapping or conformal prediction methods. Refer to the References
    section for additional details on these methods.

    Args:
        steps:
            Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window:
            Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in `self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data. Defaults to None.
        exog:
            Exogenous variable/s included as predictor/s. Defaults to None.
        method:
            Technique used to estimate prediction intervals. Available options:
            - 'bootstrapping': Bootstrapping is used to generate prediction
              intervals [1]_.
            - 'conformal': Employs the conformal prediction split method for
              interval estimation [2]_.
            Defaults to 'bootstrapping'.
        interval:
            Confidence level of the prediction interval. Interpretation depends
            on the method used:
            - If `float`, represents the nominal (expected) coverage (between 0
              and 1). For instance, `interval=0.95` corresponds to `[2.5, 97.5]`
              percentiles.
            - If `list` or `tuple`, defines the exact percentiles to compute, which
              must be between 0 and 100 inclusive. For example, interval
              of 95% should be as `interval = [2.5, 97.5]`.
            - When using `method='conformal'`, the interval must be a float or
              a list/tuple defining a symmetric interval.
            Defaults to [5, 95].
        n_boot:
            Number of bootstrapping iterations to perform when estimating prediction
            intervals. Defaults to 250.
        use_in_sample_residuals:
            If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method. Defaults to True.
        use_binned_residuals:
            If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly. Defaults to True.
        random_state:
            Seed for the random number generator to ensure reproducibility. Defaults to 123.

    Returns:
        Pandas DataFrame with values predicted by the forecaster and their estimated interval.
        - pred: predictions.
        - lower_bound: lower bound of the interval.
        - upper_bound: upper bound of the interval.

    Raises:
        ValueError:
            If `method` is not 'bootstrapping' or 'conformal'.
        ValueError:
             If `interval` is invalid or not compatible with the chosen method.
        ValueError:
            If inputs (`steps`, `exog`, etc.) are invalid.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> rng = np.random.default_rng(123)
        >>> y = pd.Series(rng.normal(size=100), name='y')
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> _ = forecaster.fit(y=y)
        >>> # Bootstrapping method
        >>> intervals_boot = forecaster.predict_interval(
        ...     steps=3, method='bootstrapping', interval=[5, 95]
        ... )
        >>> intervals_boot.columns.tolist()
        ['pred', 'lower_bound', 'upper_bound']

        >>> # Conformal method
        >>> intervals_conf = forecaster.predict_interval(
        ...     steps=3, method='conformal', interval=0.95
        ... )
        >>> intervals_conf.columns.tolist()
        ['pred', 'lower_bound', 'upper_bound']

    References:
        .. [1] Forecasting: Principles and Practice (3rd ed) Rob J Hyndman and George Athanasopoulos.
               https://otexts.com/fpp3/prediction-intervals.html
        .. [2] MAPIE - Model Agnostic Prediction Interval Estimator.
               https://mapie.readthedocs.io/en/stable/theoretical_description_regression.html#the-split-method
    """

    if method == "bootstrapping":

        if isinstance(interval, (list, tuple)):
            check_interval(interval=interval, ensure_symmetric_intervals=False)
            interval = np.array(interval) / 100
        else:
            check_interval(alpha=interval, alpha_literal="interval")
            interval = np.array([0.5 - interval / 2, 0.5 + interval / 2])

        boot_predictions = self.predict_bootstrapping(
            steps=steps,
            last_window=last_window,
            exog=exog,
            n_boot=n_boot,
            random_state=random_state,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )

        predictions = self.predict(
            steps=steps, last_window=last_window, exog=exog, check_inputs=False
        )

        predictions_interval = boot_predictions.quantile(
            q=interval, axis=1
        ).transpose()
        predictions_interval.columns = ["lower_bound", "upper_bound"]
        predictions = pd.concat((predictions, predictions_interval), axis=1)

    elif method == "conformal":

        if isinstance(interval, (list, tuple)):
            check_interval(interval=interval, ensure_symmetric_intervals=True)
            nominal_coverage = (interval[1] - interval[0]) / 100
        else:
            check_interval(alpha=interval, alpha_literal="interval")
            nominal_coverage = interval

        predictions = self._predict_interval_conformal(
            steps=steps,
            last_window=last_window,
            exog=exog,
            nominal_coverage=nominal_coverage,
            use_in_sample_residuals=use_in_sample_residuals,
            use_binned_residuals=use_binned_residuals,
        )
    else:
        raise ValueError(
            f"Invalid `method` '{method}'. Choose 'bootstrapping' or 'conformal'."
        )

    return predictions

predict_quantiles(steps, last_window=None, exog=None, quantiles=[0.05, 0.5, 0.95], n_boot=250, use_in_sample_residuals=True, use_binned_residuals=True, random_state=123)

Calculate the specified quantiles for each step. After generating multiple forecasting predictions through a bootstrapping process, each quantile is calculated for each step.

Parameters:

Name Type Description Default
steps int | str | Timestamp

Number of steps to predict. - If steps is int, number of steps to predict. - If str or pandas Datetime, the prediction will be up to that date.

required
last_window Series | DataFrame | None

Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1). If last_window = None, the values stored inself.last_window_ are used to calculate the initial predictors, and the predictions start right after training data.

None
exog Series | DataFrame | None

Exogenous variable/s included as predictor/s.

None
quantiles list[float] | tuple[float]

Sequence of quantiles to compute, which must be between 0 and 1 inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as quantiles = [0.05, 0.5, 0.95].

[0.05, 0.5, 0.95]
n_boot int

Number of bootstrapping iterations to perform when estimating quantiles.

250
use_in_sample_residuals bool

If True, residuals from the training data are used as proxy of prediction error to create predictions. If False, out of sample residuals (calibration) are used. Out-of-sample residuals must be precomputed using Forecaster's set_out_sample_residuals() method.

True
use_binned_residuals bool

If True, residuals are selected based on the predicted values (binned selection). If False, residuals are selected randomly.

True
random_state int

Seed for the random number generator to ensure reproducibility.

123

Returns:

Type Description
DataFrame

Quantiles predicted by the forecaster.

Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def predict_quantiles(
    self,
    steps: int | str | pd.Timestamp,
    last_window: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | None = None,
    quantiles: list[float] | tuple[float] = [0.05, 0.5, 0.95],
    n_boot: int = 250,
    use_in_sample_residuals: bool = True,
    use_binned_residuals: bool = True,
    random_state: int = 123,
) -> pd.DataFrame:
    """
    Calculate the specified quantiles for each step. After generating
    multiple forecasting predictions through a bootstrapping process, each
    quantile is calculated for each step.

    Args:
        steps: Number of steps to predict.
            - If steps is int, number of steps to predict.
            - If str or pandas Datetime, the prediction will be up to that date.
        last_window: Series values used to create the predictors (lags) needed in the
            first iteration of the prediction (t + 1).
            If `last_window = None`, the values stored in` self.last_window_` are
            used to calculate the initial predictors, and the predictions start
            right after training data.
        exog: Exogenous variable/s included as predictor/s.
        quantiles: Sequence of quantiles to compute, which must be between 0 and 1
            inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as
            `quantiles = [0.05, 0.5, 0.95]`.
        n_boot: Number of bootstrapping iterations to perform when estimating quantiles.
        use_in_sample_residuals: If `True`, residuals from the training data are used as proxy of
            prediction error to create predictions.
            If `False`, out of sample residuals (calibration) are used.
            Out-of-sample residuals must be precomputed using Forecaster's
            `set_out_sample_residuals()` method.
        use_binned_residuals: If `True`, residuals are selected based on the predicted values
            (binned selection).
            If `False`, residuals are selected randomly.
        random_state: Seed for the random number generator to ensure reproducibility.

    Returns:
        Quantiles predicted by the forecaster.
    """

    check_interval(quantiles=quantiles)

    boot_predictions = self.predict_bootstrapping(
        steps=steps,
        last_window=last_window,
        exog=exog,
        n_boot=n_boot,
        random_state=random_state,
        use_in_sample_residuals=use_in_sample_residuals,
        use_binned_residuals=use_binned_residuals,
    )

    predictions = boot_predictions.quantile(q=quantiles, axis=1).transpose()
    predictions.columns = [f"q_{q}" for q in quantiles]

    return predictions

set_fit_kwargs(fit_kwargs)

Set new values for the additional keyword arguments passed to the fit method of the estimator.

Parameters:

Name Type Description Default
fit_kwargs dict[str, object]

Dict of the form {"argument": new_value}.

required
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_fit_kwargs(self, fit_kwargs: dict[str, object]) -> None:
    """
    Set new values for the additional keyword arguments passed to the `fit`
    method of the estimator.

    Args:
        fit_kwargs: Dict of the form {"argument": new_value}.
    """

    self.fit_kwargs = check_select_fit_kwargs(self.estimator, fit_kwargs=fit_kwargs)

set_in_sample_residuals(y, exog=None, random_state=123)

Set in-sample residuals in case they were not calculated during the training process.

In-sample residuals are calculated as the difference between the true values and the predictions made by the forecaster using the training data. The following internal attributes are updated:

  • in_sample_residuals_: residuals stored in a numpy ndarray.
  • binner_intervals_: intervals used to bin the residuals are calculated using the quantiles of the predicted values.
  • in_sample_residuals_by_bin_: residuals are binned according to the predicted value they are associated with and stored in a dictionary, where the keys are the intervals of the predicted values and the values are the residuals associated with that range.

A total of 10_000 residuals are stored in the attribute in_sample_residuals_. If the number of residuals is greater than 10_000, a random sample of 10_000 residuals is stored. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_.

Parameters:

Name Type Description Default
y Series

Target time series.

required
exog: Exogenous variables.
random_state: Random state for reproducibility.

Returns:

Type Description
None

None

Raises:

Type Description
NotFittedError

If the forecaster is not fitted.

IndexError

If the index range of y does not match the range used during training.

ValueError

If the features generated from the provided data do not match those used during the training process.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
>>> forecaster.set_in_sample_residuals(y=pd.Series(np.arange(20)))
>>> forecaster.in_sample_residuals_
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_in_sample_residuals(
    self,
    y: pd.Series,
    exog: pd.Series | pd.DataFrame | None = None,
    random_state: int = 123,
) -> None:
    """
    Set in-sample residuals in case they were not calculated during the
    training process.

    In-sample residuals are calculated as the difference between the true
    values and the predictions made by the forecaster using the training
    data. The following internal attributes are updated:

    + `in_sample_residuals_`: residuals stored in a numpy ndarray.
    + `binner_intervals_`: intervals used to bin the residuals are calculated
    using the quantiles of the predicted values.
    + `in_sample_residuals_by_bin_`: residuals are binned according to the
    predicted value they are associated with and stored in a dictionary, where
    the keys are the intervals of the predicted values and the values are
    the residuals associated with that range.

    A total of 10_000 residuals are stored in the attribute `in_sample_residuals_`.
    If the number of residuals is greater than 10_000, a random sample of
    10_000 residuals is stored. The number of residuals stored per bin is
    limited to `10_000 // self.binner.n_bins_`.

    Args:
            y: Target time series.
        exog: Exogenous variables.
        random_state: Random state for reproducibility.

    Returns:
        None

    Raises:
        NotFittedError: If the forecaster is not fitted.
        IndexError: If the index range of `y` does not match the range
            used during training.
        ValueError: If the features generated from the provided data do not
            match those used during the training process.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
        >>> forecaster.set_in_sample_residuals(y=pd.Series(np.arange(20)))
        >>> forecaster.in_sample_residuals_
        array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    """
    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `set_in_sample_residuals()`."
        )

    check_y(y=y)
    y_index_range = check_extract_values_and_index(
        data=y, data_label="`y`", return_values=False
    )[1][[0, -1]]

    if not y_index_range.equals(self.training_range_):
        raise IndexError(
            f"The index range of `y` does not match the range "
            f"used during training. Please ensure the index is aligned "
            f"with the training data.\n"
            f"    Expected : {self.training_range_}\n"
            f"    Received : {y_index_range}"
        )

    (
        X_train,
        y_train,
        _,
        _,
        _,
        X_train_features_names_out_,
        *_,
    ) = self._create_train_X_y(y=y, exog=exog)

    if not X_train_features_names_out_ == self.X_train_features_names_out_:
        raise ValueError(
            f"Feature mismatch detected after matrix creation. The features "
            f"generated from the provided data do not match those used during "
            f"the training process. To correctly set in-sample residuals, "
            f"ensure that the same data and preprocessing steps are applied.\n"
            f"    Expected output : {self.X_train_features_names_out_}\n"
            f"    Current output  : {X_train_features_names_out_}"
        )

    self._binning_in_sample_residuals(
        y_true=y_train.to_numpy(),
        y_pred=self.estimator.predict(X_train).ravel(),
        store_in_sample_residuals=True,
        random_state=random_state,
    )

set_lags(lags=None)

Set new value to the attribute lags. Attributes lags_names, max_lag and window_size are also updated.

Parameters:

Name Type Description Default
lags Union[int, List[int], ndarray, range, None]

Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1. - int: include lags from 1 to lags (included). - list, 1d numpy ndarray or range: include only lags present in lags, all elements must be int. - None: no lags are included as predictors.

None
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_lags(
    self, lags: Union[int, List[int], np.ndarray, range, None] = None
) -> None:
    """
    Set new value to the attribute `lags`. Attributes `lags_names`,
    `max_lag` and `window_size` are also updated.

    Args:
        lags: Lags used as predictors. Index starts at 1, so lag 1 is equal to t-1.
            - `int`: include lags from 1 to `lags` (included).
            - `list`, `1d numpy ndarray` or `range`: include only lags present in
            `lags`, all elements must be int.
            - `None`: no lags are included as predictors.
    """

    if self.window_features is None and lags is None:
        raise ValueError(
            "At least one of the arguments `lags` or `window_features` "
            "must be different from None. This is required to create the "
            "predictors used in training the forecaster."
        )

    self.lags, self.lags_names, self.max_lag = initialize_lags(
        type(self).__name__, lags
    )
    self.window_size = max(
        [
            ws
            for ws in [self.max_lag, self.max_size_window_features]
            if ws is not None
        ]
    )
    if self.differentiation is not None:
        self.window_size += self.differentiation
        self.differentiator.set_params(window_size=self.window_size)

set_out_sample_residuals(y_true, y_pred, append=False, random_state=123)

Set new values to the attribute out_sample_residuals_.

Out of sample residuals are meant to be calculated using observations that did not participate in the training process. y_true and y_pred are expected to be in the original scale of the time series. Residuals are calculated as y_true - y_pred, after applying the necessary transformations and differentiations if the forecaster includes them (self.transformer_y and self.differentiation). Two internal attributes are updated:

  • out_sample_residuals_: residuals stored in a numpy ndarray.
  • out_sample_residuals_by_bin_: residuals are binned according to the predicted value they are associated with and stored in a dictionary, where the keys are the intervals of the predicted values and the values are the residuals associated with that range. If a bin is empty, it is filled with a random sample of residuals from other bins. This is done to ensure that all bins have at least one residual and can be used in the prediction process.

A total of 10_000 residuals are stored in the attribute out_sample_residuals_. If the number of residuals is greater than 10_000, a random sample of 10_000 residuals is stored. The number of residuals stored per bin is limited to 10_000 // self.binner.n_bins_.

Parameters:

Name Type Description Default
y_true ndarray | Series

True values of the time series in the original scale.

required
y_pred ndarray | Series

Predicted values of the time series in the original scale.

required
append bool

If True, new residuals are added to the once already stored in the forecaster. If after appending the new residuals, the limit of 10_000 // self.binner.n_bins_ values per bin is reached, a random sample of residuals is stored.

False
random_state int

Random state for reproducibility.

123

Returns:

Type Description
None

None

Raises:

Type Description
NotFittedError

If the forecaster is not fitted.

TypeError

If y_true or y_pred are not numpy ndarray or pandas Series.

ValueError

If y_true and y_pred have different length or index (if Series).

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
>>> y_true = np.array([20, 21, 22, 23, 24])
>>> y_pred = np.array([20.1, 20.9, 22.2, 22.8, 24.0])
>>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=y_pred)
>>> forecaster.out_sample_residuals_
array([-0.1,  0.1, -0.2,  0.2,  0. ])
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_out_sample_residuals(
    self,
    y_true: np.ndarray | pd.Series,
    y_pred: np.ndarray | pd.Series,
    append: bool = False,
    random_state: int = 123,
) -> None:
    """
    Set new values to the attribute `out_sample_residuals_`.

    Out of sample residuals are meant to be calculated using observations that
    did not participate in the training process. `y_true` and `y_pred` are
    expected to be in the original scale of the time series. Residuals are
    calculated as `y_true` - `y_pred`, after applying the necessary
    transformations and differentiations if the forecaster includes them
    (`self.transformer_y` and `self.differentiation`). Two internal attributes
    are updated:

    + `out_sample_residuals_`: residuals stored in a numpy ndarray.
    + `out_sample_residuals_by_bin_`: residuals are binned according to the
    predicted value they are associated with and stored in a dictionary, where
    the keys are the intervals of the predicted values and the values are
    the residuals associated with that range. If a bin is empty, it is filled
    with a random sample of residuals from other bins. This is done to ensure
    that all bins have at least one residual and can be used in the prediction
    process.

    A total of 10_000 residuals are stored in the attribute `out_sample_residuals_`.
    If the number of residuals is greater than 10_000, a random sample of
    10_000 residuals is stored. The number of residuals stored per bin is
    limited to `10_000 // self.binner.n_bins_`.

    Args:
        y_true: True values of the time series in the original scale.
        y_pred: Predicted values of the time series in the original scale.
        append: If `True`, new residuals are added to the once already stored
            in the forecaster. If after appending the new residuals, the limit
            of `10_000 // self.binner.n_bins_` values per bin is reached, a
            random sample of residuals is stored.
        random_state: Random state for reproducibility.

    Returns:
        None

    Raises:
        NotFittedError: If the forecaster is not fitted.
        TypeError: If `y_true` or `y_pred` are not `numpy ndarray` or `pandas Series`.
        ValueError: If `y_true` and `y_pred` have different length or index (if Series).

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.fit(y=pd.Series(np.arange(20)), store_in_sample_residuals=False)
        >>> y_true = np.array([20, 21, 22, 23, 24])
        >>> y_pred = np.array([20.1, 20.9, 22.2, 22.8, 24.0])
        >>> forecaster.set_out_sample_residuals(y_true=y_true, y_pred=y_pred)
        >>> forecaster.out_sample_residuals_
        array([-0.1,  0.1, -0.2,  0.2,  0. ])
    """
    if not self.is_fitted:
        raise NotFittedError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `set_out_sample_residuals()`."
        )

    if not isinstance(y_true, (np.ndarray, pd.Series)):
        raise TypeError(
            f"`y_true` argument must be `numpy ndarray` or `pandas Series`. "
            f"Got {type(y_true)}."
        )

    if not isinstance(y_pred, (np.ndarray, pd.Series)):
        raise TypeError(
            f"`y_pred` argument must be `numpy ndarray` or `pandas Series`. "
            f"Got {type(y_pred)}."
        )

    if len(y_true) != len(y_pred):
        raise ValueError(
            f"`y_true` and `y_pred` must have the same length. "
            f"Got {len(y_true)} and {len(y_pred)}."
        )

    if isinstance(y_true, pd.Series) and isinstance(y_pred, pd.Series):
        if not y_true.index.equals(y_pred.index):
            raise ValueError("`y_true` and `y_pred` must have the same index.")

    if not isinstance(y_pred, np.ndarray):
        y_pred = y_pred.to_numpy()
    if not isinstance(y_true, np.ndarray):
        y_true = y_true.to_numpy()

    if self.transformer_y:
        y_true = transform_numpy(
            array=y_true,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=False,
        )
        y_pred = transform_numpy(
            array=y_pred,
            transformer=self.transformer_y,
            fit=False,
            inverse_transform=False,
        )

    if self.differentiation is not None:
        differentiator = copy(self.differentiator)
        differentiator.set_params(window_size=None)
        y_true = differentiator.fit_transform(y_true)[self.differentiation :]
        y_pred = differentiator.fit_transform(y_pred)[self.differentiation :]

    data = pd.DataFrame(
        {"prediction": y_pred, "residuals": y_true - y_pred}
    ).dropna()
    y_pred = data["prediction"].to_numpy()
    residuals = data["residuals"].to_numpy()

    if self.binner is not None:
        data["bin"] = self.binner.transform(y_pred).astype(int)
        residuals_by_bin = (
            data.groupby("bin")["residuals"].apply(np.array).to_dict()
        )
    else:
        residuals_by_bin = {}

    out_sample_residuals = (
        np.array([])
        if self.out_sample_residuals_ is None
        else self.out_sample_residuals_
    )
    out_sample_residuals_by_bin = (
        {}
        if self.out_sample_residuals_by_bin_ is None
        else self.out_sample_residuals_by_bin_
    )
    if append:
        out_sample_residuals = np.concatenate([out_sample_residuals, residuals])
        for k, v in residuals_by_bin.items():
            if k in out_sample_residuals_by_bin:
                out_sample_residuals_by_bin[k] = np.concatenate(
                    (out_sample_residuals_by_bin[k], v)
                )
            else:
                out_sample_residuals_by_bin[k] = v
    else:
        out_sample_residuals = residuals
        out_sample_residuals_by_bin = residuals_by_bin

    if self.binner is not None:
        max_samples = 10_000 // self.binner.n_bins
        rng = np.random.default_rng(seed=random_state)

        for k, v in out_sample_residuals_by_bin.items():
            if len(v) > max_samples:
                out_sample_residuals_by_bin[k] = rng.choice(
                    v, size=max_samples, replace=False
                )

        bin_keys = (
            [] if self.binner_intervals_ is None else self.binner_intervals_.keys()
        )
        empty_bins = [
            k
            for k in bin_keys
            if k not in out_sample_residuals_by_bin
            or len(out_sample_residuals_by_bin[k]) == 0
        ]

        if empty_bins:
            warnings.warn(
                f"The following bins have no out of sample residuals: {empty_bins}. "
                f"No predicted values fall in the interval "
                f"{[self.binner_intervals_[bin] for bin in empty_bins]}. "
                f"Empty bins will be filled with a random sample of residuals.",
                ResidualsUsageWarning,
            )
            empty_bin_size = min(max_samples, len(out_sample_residuals))
            for k in empty_bins:
                out_sample_residuals_by_bin[k] = rng.choice(
                    a=out_sample_residuals, size=empty_bin_size, replace=False
                )

    self.out_sample_residuals_ = out_sample_residuals
    self.out_sample_residuals_by_bin_ = out_sample_residuals_by_bin

set_params(params=None, **kwargs)

Set the parameters of this forecaster.

Parameters:

Name Type Description Default
params Dict[str, object]

Optional dictionary of parameter names mapped to their new values. If provided, these parameters are set first.

None
**kwargs object

Dictionary of parameter names mapped to their new values. Parameters can be for the forecaster itself or for the contained estimator (using the estimator__ prefix).

{}

Returns:

Name Type Description
self 'ForecasterRecursive'

The forecaster instance with updated parameters.

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> forecaster.set_params(estimator__fit_intercept=False)
>>> forecaster.estimator.get_params()["fit_intercept"]
False
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_params(
    self, params: Dict[str, object] = None, **kwargs: object
) -> "ForecasterRecursive":
    """
    Set the parameters of this forecaster.

    Args:
        params: Optional dictionary of parameter names mapped to their new values.
            If provided, these parameters are set first.
        **kwargs: Dictionary of parameter names mapped to their new values.
            Parameters can be for the forecaster itself or for the contained estimator (using the `estimator__` prefix).

    Returns:
        self: The forecaster instance with updated parameters.

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> forecaster.set_params(estimator__fit_intercept=False)
        >>> forecaster.estimator.get_params()["fit_intercept"]
        False
    """

    # Merge params dict and kwargs
    all_params = {}
    if params is not None:
        all_params.update(params)
    all_params.update(kwargs)

    if not all_params:
        return self

    valid_params = self.get_params(deep=True)
    nested_params = {}

    for key, value in all_params.items():
        if key not in valid_params and "__" not in key:
            # Relaxed check for now
            pass

        if "__" in key:
            obj_name, param_name = key.split("__", 1)
            if obj_name not in nested_params:
                nested_params[obj_name] = {}
            nested_params[obj_name][param_name] = value
        else:
            setattr(self, key, value)

    for obj_name, obj_params in nested_params.items():
        if hasattr(self, obj_name):
            obj = getattr(self, obj_name)
            if hasattr(obj, "set_params"):
                obj.set_params(**obj_params)
            else:
                for param_name, value in obj_params.items():
                    setattr(obj, param_name, value)

    return self

set_window_features(window_features=None)

Set new value to the attribute window_features.

Attributes max_size_window_features, window_features_names, window_features_class_names and window_size are also updated.

Parameters:

Name Type Description Default
window_features object | list[object] | None

Instance or list of instances used to create window features. Window features are created from the original time series and are included as predictors.

None

Returns:

Type Description
None

None

Examples:

>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.preprocessing import RollingFeatures
>>> import pandas as pd
>>> import numpy as np
>>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
>>> rolling = RollingFeatures(stats=['mean', 'std'], window_sizes=[3, 5])
>>> forecaster.set_window_features(window_features=rolling)
>>> forecaster.window_features_names
['roll_mean_3', 'roll_std_3', 'roll_mean_5', 'roll_std_5']
>>> forecaster.window_size
5
Source code in src/spotforecast2_safe/forecaster/recursive/_forecaster_recursive.py
def set_window_features(
    self, window_features: object | list[object] | None = None
) -> None:
    """
    Set new value to the attribute `window_features`.

    Attributes `max_size_window_features`, `window_features_names`,
    `window_features_class_names` and `window_size` are also updated.

    Args:
        window_features: Instance or list of instances used to create window features.
            Window features are created from the original time series and are
            included as predictors.

    Returns:
        None

    Examples:
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.preprocessing import RollingFeatures
        >>> import pandas as pd
        >>> import numpy as np
        >>> forecaster = ForecasterRecursive(estimator=LinearRegression(), lags=3)
        >>> rolling = RollingFeatures(stats=['mean', 'std'], window_sizes=[3, 5])
        >>> forecaster.set_window_features(window_features=rolling)
        >>> forecaster.window_features_names
        ['roll_mean_3', 'roll_std_3', 'roll_mean_5', 'roll_std_5']
        >>> forecaster.window_size
        5
    """

    if window_features is None and self.lags is None:
        raise ValueError(
            "At least one of the arguments `lags` or `window_features` "
            "must be different from None. This is required to create the "
            "predictors used in training the forecaster."
        )

    (
        self.window_features,
        self.window_features_names,
        self.max_size_window_features,
    ) = initialize_window_features(window_features)
    self.window_features_class_names = None
    if window_features is not None:
        self.window_features_class_names = [
            type(wf).__name__ for wf in self.window_features
        ]
    self.window_size = max(
        [
            ws
            for ws in [self.max_lag, self.max_size_window_features]
            if ws is not None
        ]
    )
    if self.differentiation is not None:
        self.window_size += self.differentiation
        self.differentiator.set_params(window_size=self.window_size)

Forecasting Utilities

utils

spotforecast2_safe.forecaster.utils

check_exog(exog, allow_nan=True, series_id='`exog`')

Validate that exog is a pandas Series or DataFrame.

This function ensures that exogenous variables meet basic requirements: - Must be a pandas Series or DataFrame - If Series, must have a name - Optionally warns if NaN values are present

Parameters:

Name Type Description Default
exog Union[Series, DataFrame]

Exogenous variable/s included as predictor/s.

required
allow_nan bool

If True, allows NaN values but issues a warning. If False, raises no warning about NaN values. Defaults to True.

True
series_id str

Identifier of the series used in error messages. Defaults to "exog".

'`exog`'

Raises:

Type Description
TypeError

If exog is not a pandas Series or DataFrame.

ValueError

If exog is a Series without a name.

Warns:

Type Description
MissingValuesWarning

If allow_nan=True and exog contains NaN values.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.utils.validation import check_exog
>>>
>>> # Valid DataFrame
>>> exog_df = pd.DataFrame({"temp": [20, 21, 22], "humidity": [50, 55, 60]})
>>> check_exog(exog_df)  # No error
>>>
>>> # Valid Series with name
>>> exog_series = pd.Series([1, 2, 3], name="temperature")
>>> check_exog(exog_series)  # No error
>>>
>>> # Invalid: Series without name
>>> exog_no_name = pd.Series([1, 2, 3])
>>> try:
...     check_exog(exog_no_name)
... except ValueError as e:
...     print(f"Error: {e}")
Error: When `exog` is a pandas Series, it must have a name.
>>>
>>> # Invalid: not a Series/DataFrame
>>> try:
...     check_exog([1, 2, 3])
... except TypeError as e:
...     print(f"Error: {e}")
Error: `exog` must be a pandas Series or DataFrame. Got <class 'list'>.
Source code in src/spotforecast2_safe/utils/validation.py
def check_exog(
    exog: Union[pd.Series, pd.DataFrame],
    allow_nan: bool = True,
    series_id: str = "`exog`",
) -> None:
    """
    Validate that exog is a pandas Series or DataFrame.

    This function ensures that exogenous variables meet basic requirements:
    - Must be a pandas Series or DataFrame
    - If Series, must have a name
    - Optionally warns if NaN values are present

    Args:
        exog: Exogenous variable/s included as predictor/s.
        allow_nan: If True, allows NaN values but issues a warning. If False,
            raises no warning about NaN values. Defaults to True.
        series_id: Identifier of the series used in error messages. Defaults to "`exog`".

    Raises:
        TypeError: If exog is not a pandas Series or DataFrame.
        ValueError: If exog is a Series without a name.

    Warnings:
        MissingValuesWarning: If allow_nan=True and exog contains NaN values.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.utils.validation import check_exog
        >>>
        >>> # Valid DataFrame
        >>> exog_df = pd.DataFrame({"temp": [20, 21, 22], "humidity": [50, 55, 60]})
        >>> check_exog(exog_df)  # No error
        >>>
        >>> # Valid Series with name
        >>> exog_series = pd.Series([1, 2, 3], name="temperature")
        >>> check_exog(exog_series)  # No error
        >>>
        >>> # Invalid: Series without name
        >>> exog_no_name = pd.Series([1, 2, 3])
        >>> try:
        ...     check_exog(exog_no_name)
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: When `exog` is a pandas Series, it must have a name.
        >>>
        >>> # Invalid: not a Series/DataFrame
        >>> try:
        ...     check_exog([1, 2, 3])
        ... except TypeError as e:
        ...     print(f"Error: {e}")
        Error: `exog` must be a pandas Series or DataFrame. Got <class 'list'>.
    """
    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(
            f"{series_id} must be a pandas Series or DataFrame. Got {type(exog)}."
        )

    if isinstance(exog, pd.Series) and exog.name is None:
        raise ValueError(f"When {series_id} is a pandas Series, it must have a name.")

    if not allow_nan:
        if exog.isna().to_numpy().any():
            warnings.warn(
                f"{series_id} has missing values. Most machine learning models "
                f"do not allow missing values. Fitting the forecaster may fail.",
                MissingValuesWarning,
            )

    return

check_exog_dtypes(exog, call_check_exog=True, series_id='`exog`')

Check that exogenous variables have valid data types (int, float, category).

This function validates that the exogenous variables (Series or DataFrame) contain only supported data types: integer, float, or category. It issues a warning if other types (like object/string) are found, as these may cause issues with some machine learning estimators.

It also strictly enforces that categorical columns must have integer categories.

Parameters:

Name Type Description Default
exog Union[Series, DataFrame]

Exogenous variables to check.

required
call_check_exog bool

If True, calls check_exog() first to ensure basic validity. Defaults to True.

True
series_id str

Identifier used in warning/error messages. Defaults to "exog".

'`exog`'

Raises:

Type Description
TypeError

If categorical columns contain non-integer categories.

Warns:

Type Description
DataTypeWarning

If columns with unsupported data types (not int, float, category) are found.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.utils.validation import check_exog_dtypes
>>>
>>> # Valid types (float, int)
>>> df_valid = pd.DataFrame({
...     "a": [1.0, 2.0, 3.0],
...     "b": [1, 2, 3]
... })
>>> check_exog_dtypes(df_valid)  # No warning
>>>
>>> # Invalid type (object/string)
>>> df_invalid = pd.DataFrame({
...     "a": [1, 2, 3],
...     "b": ["x", "y", "z"]
... })
>>> check_exog_dtypes(df_invalid)
... # Issues DataTypeWarning about column 'b'
>>>
>>> # Valid categorical (with integer categories)
>>> df_cat = pd.DataFrame({"a": [1, 2, 1]})
>>> df_cat["a"] = df_cat["a"].astype("category")
>>> check_exog_dtypes(df_cat)  # No warning
Source code in src/spotforecast2_safe/utils/validation.py
def check_exog_dtypes(
    exog: Union[pd.Series, pd.DataFrame],
    call_check_exog: bool = True,
    series_id: str = "`exog`",
) -> None:
    """
    Check that exogenous variables have valid data types (int, float, category).

    This function validates that the exogenous variables (Series or DataFrame)
    contain only supported data types: integer, float, or category. It issues a
    warning if other types (like object/string) are found, as these may cause
    issues with some machine learning estimators.

    It also strictly enforces that categorical columns must have integer categories.

    Args:
        exog: Exogenous variables to check.
        call_check_exog: If True, calls check_exog() first to ensure basic validity.
            Defaults to True.
        series_id: Identifier used in warning/error messages. Defaults to "`exog`".

    Raises:
        TypeError: If categorical columns contain non-integer categories.

    Warnings:
        DataTypeWarning: If columns with unsupported data types (not int, float, category)
            are found.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.utils.validation import check_exog_dtypes
        >>>
        >>> # Valid types (float, int)
        >>> df_valid = pd.DataFrame({
        ...     "a": [1.0, 2.0, 3.0],
        ...     "b": [1, 2, 3]
        ... })
        >>> check_exog_dtypes(df_valid)  # No warning
        >>>
        >>> # Invalid type (object/string)
        >>> df_invalid = pd.DataFrame({
        ...     "a": [1, 2, 3],
        ...     "b": ["x", "y", "z"]
        ... })
        >>> check_exog_dtypes(df_invalid)
        ... # Issues DataTypeWarning about column 'b'
        >>>
        >>> # Valid categorical (with integer categories)
        >>> df_cat = pd.DataFrame({"a": [1, 2, 1]})
        >>> df_cat["a"] = df_cat["a"].astype("category")
        >>> check_exog_dtypes(df_cat)  # No warning
    """
    if call_check_exog:
        check_exog(exog=exog, allow_nan=False, series_id=series_id)

    valid_dtypes = ("int", "Int", "float", "Float", "uint")

    if isinstance(exog, pd.DataFrame):
        unique_dtypes = set(exog.dtypes)
        has_invalid_dtype = False
        for dtype in unique_dtypes:
            if isinstance(dtype, pd.CategoricalDtype):
                try:
                    is_integer = np.issubdtype(dtype.categories.dtype, np.integer)
                except TypeError:
                    # Pandas StringDtype and other non-numpy dtypes will raise TypeError
                    is_integer = False

                if not is_integer:
                    raise TypeError(
                        "Categorical dtypes in exog must contain only integer values. "
                    )
            elif not dtype.name.startswith(valid_dtypes):
                has_invalid_dtype = True

        if has_invalid_dtype:
            warnings.warn(
                f"{series_id} may contain only `int`, `float` or `category` dtypes. "
                f"Most machine learning models do not allow other types of values. "
                f"Fitting the forecaster may fail.",
                DataTypeWarning,
            )

    else:
        dtype_name = str(exog.dtypes)
        if not (dtype_name.startswith(valid_dtypes) or dtype_name == "category"):
            warnings.warn(
                f"{series_id} may contain only `int`, `float` or `category` dtypes. Most "
                f"machine learning models do not allow other types of values. "
                f"Fitting the forecaster may fail.",
                DataTypeWarning,
            )

        if isinstance(exog.dtype, pd.CategoricalDtype):
            if not np.issubdtype(exog.cat.categories.dtype, np.integer):
                raise TypeError(
                    "Categorical dtypes in exog must contain only integer values. "
                )
    return

check_extract_values_and_index(data, data_label='`y`', ignore_freq=False, return_values=True)

Extract values and index from a pandas Series or DataFrame, ensuring they are valid.

Validates that the input data has a proper DatetimeIndex or RangeIndex and extracts its values and index for use in forecasting operations. Optionally checks for index frequency consistency.

Parameters:

Name Type Description Default
data Union[Series, DataFrame]

Input data (pandas Series or DataFrame) to extract values and index from.

required
data_label str

Label used in exception messages for better error reporting. Defaults to "y".

'`y`'
ignore_freq bool

If True, the frequency of the index is not checked. Defaults to False.

False
return_values bool

If True, the values of the data are returned. Defaults to True.

True

Returns:

Name Type Description
tuple Tuple[Optional[ndarray], Index]

A tuple containing: - values (numpy.ndarray or None): Values of the data as numpy array, or None if return_values is False. - index (pandas.Index): Index of the data.

Raises:

Type Description
TypeError

If data is not a pandas Series or DataFrame.

TypeError

If data index is not a DatetimeIndex or RangeIndex.

Warns:

Type Description
UserWarning

If DatetimeIndex has no frequency (inferred automatically).

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> dates = pd.date_range('2020-01-01', periods=10, freq='D')
>>> series = pd.Series(np.arange(10), index=dates)
>>> values, index = check_extract_values_and_index(series)
>>> print(values.shape)
(10,)
>>> print(type(index))
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

Extract index only:

>>> _, index = check_extract_values_and_index(series, return_values=False)
>>> print(index[0])
2020-01-01 00:00:00
Source code in src/spotforecast2_safe/forecaster/utils.py
def check_extract_values_and_index(
    data: Union[pd.Series, pd.DataFrame],
    data_label: str = "`y`",
    ignore_freq: bool = False,
    return_values: bool = True,
) -> Tuple[Optional[np.ndarray], pd.Index]:
    """Extract values and index from a pandas Series or DataFrame, ensuring they are valid.

    Validates that the input data has a proper DatetimeIndex or RangeIndex and extracts
    its values and index for use in forecasting operations. Optionally checks for
    index frequency consistency.

    Args:
        data: Input data (pandas Series or DataFrame) to extract values and index from.
        data_label: Label used in exception messages for better error reporting.
            Defaults to "`y`".
        ignore_freq: If True, the frequency of the index is not checked.
            Defaults to False.
        return_values: If True, the values of the data are returned.
            Defaults to True.

    Returns:
        tuple: A tuple containing:
            - values (numpy.ndarray or None): Values of the data as numpy array,
              or None if return_values is False.
            - index (pandas.Index): Index of the data.

    Raises:
        TypeError: If data is not a pandas Series or DataFrame.
        TypeError: If data index is not a DatetimeIndex or RangeIndex.

    Warnings:
        UserWarning: If DatetimeIndex has no frequency (inferred automatically).

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> dates = pd.date_range('2020-01-01', periods=10, freq='D')
        >>> series = pd.Series(np.arange(10), index=dates)
        >>> values, index = check_extract_values_and_index(series)
        >>> print(values.shape)
        (10,)
        >>> print(type(index))
        <class 'pandas.core.indexes.datetimes.DatetimeIndex'>

        Extract index only:
        >>> _, index = check_extract_values_and_index(series, return_values=False)
        >>> print(index[0])
        2020-01-01 00:00:00
    """

    if not isinstance(data, (pd.Series, pd.DataFrame)):
        raise TypeError(f"{data_label} must be a pandas Series or DataFrame.")

    if not isinstance(data.index, (pd.DatetimeIndex, pd.RangeIndex)):
        raise TypeError(f"{data_label} must have a pandas DatetimeIndex or RangeIndex.")

    if isinstance(data.index, pd.DatetimeIndex) and not ignore_freq:
        if data.index.freq is None:
            warnings.warn(
                f"{data_label} has a DatetimeIndex but no frequency. "
                "The frequency has been inferred from the index.",
                UserWarning,
            )

    values = data.to_numpy() if return_values else None

    return values, data.index

check_interval(interval=None, ensure_symmetric_intervals=False, quantiles=None, alpha=None, alpha_literal='alpha')

Validate that a confidence interval specification is valid.

This function checks that interval values are properly formatted and within valid ranges for confidence interval prediction.

Parameters:

Name Type Description Default
interval Union[List[float], Tuple[float], None]

Confidence interval percentiles (0-100 inclusive). Should be [lower_bound, upper_bound]. Example: [2.5, 97.5] for 95% interval.

None
ensure_symmetric_intervals bool

If True, ensure intervals are symmetric (lower + upper = 100).

False
quantiles Union[List[float], Tuple[float], None]

Sequence of quantiles (0-1 inclusive). Currently not validated, reserved for future use.

None
alpha Optional[float]

Confidence level (1-alpha). Currently not validated, reserved for future use.

None
alpha_literal Optional[str]

Name used in error messages for alpha parameter.

'alpha'

Raises:

Type Description
TypeError

If interval is not a list or tuple.

ValueError

If interval doesn't have exactly 2 values, values out of range (0-100), lower >= upper, or intervals not symmetric when required.

Examples:

>>> from spotforecast2_safe.utils.validation import check_interval
>>>
>>> # Valid 95% confidence interval
>>> check_interval(interval=[2.5, 97.5])  # No error
>>>
>>> # Valid symmetric interval
>>> check_interval(interval=[2.5, 97.5], ensure_symmetric_intervals=True)  # No error
>>>
>>> # Invalid: not symmetric
>>> try:
...     check_interval(interval=[5, 90], ensure_symmetric_intervals=True)
... except ValueError as e:
...     print("Error: Interval not symmetric")
Error: Interval not symmetric
>>>
>>> # Invalid: wrong number of values
>>> try:
...     check_interval(interval=[2.5, 50, 97.5])
... except ValueError as e:
...     print("Error: Must have exactly 2 values")
Error: Must have exactly 2 values
>>>
>>> # Invalid: out of range
>>> try:
...     check_interval(interval=[-5, 105])
... except ValueError as e:
...     print("Error: Values out of range")
Error: Values out of range
Source code in src/spotforecast2_safe/utils/validation.py
def check_interval(
    interval: Union[List[float], Tuple[float], None] = None,
    ensure_symmetric_intervals: bool = False,
    quantiles: Union[List[float], Tuple[float], None] = None,
    alpha: Optional[float] = None,
    alpha_literal: Optional[str] = "alpha",
) -> None:
    """
    Validate that a confidence interval specification is valid.

    This function checks that interval values are properly formatted and within
    valid ranges for confidence interval prediction.

    Args:
        interval: Confidence interval percentiles (0-100 inclusive).
            Should be [lower_bound, upper_bound]. Example: [2.5, 97.5] for 95% interval.
        ensure_symmetric_intervals: If True, ensure intervals are symmetric
            (lower + upper = 100).
        quantiles: Sequence of quantiles (0-1 inclusive). Currently not validated,
            reserved for future use.
        alpha: Confidence level (1-alpha). Currently not validated, reserved for future use.
        alpha_literal: Name used in error messages for alpha parameter.

    Raises:
        TypeError: If interval is not a list or tuple.
        ValueError: If interval doesn't have exactly 2 values, values out of range (0-100),
            lower >= upper, or intervals not symmetric when required.

    Examples:
        >>> from spotforecast2_safe.utils.validation import check_interval
        >>>
        >>> # Valid 95% confidence interval
        >>> check_interval(interval=[2.5, 97.5])  # No error
        >>>
        >>> # Valid symmetric interval
        >>> check_interval(interval=[2.5, 97.5], ensure_symmetric_intervals=True)  # No error
        >>>
        >>> # Invalid: not symmetric
        >>> try:
        ...     check_interval(interval=[5, 90], ensure_symmetric_intervals=True)
        ... except ValueError as e:
        ...     print("Error: Interval not symmetric")
        Error: Interval not symmetric
        >>>
        >>> # Invalid: wrong number of values
        >>> try:
        ...     check_interval(interval=[2.5, 50, 97.5])
        ... except ValueError as e:
        ...     print("Error: Must have exactly 2 values")
        Error: Must have exactly 2 values
        >>>
        >>> # Invalid: out of range
        >>> try:
        ...     check_interval(interval=[-5, 105])
        ... except ValueError as e:
        ...     print("Error: Values out of range")
        Error: Values out of range
    """
    if interval is not None:
        if not isinstance(interval, (list, tuple)):
            raise TypeError(
                "`interval` must be a `list` or `tuple`. For example, interval of 95% "
                "should be as `interval = [2.5, 97.5]`."
            )

        if len(interval) != 2:
            raise ValueError(
                "`interval` must contain exactly 2 values, respectively the "
                "lower and upper interval bounds. For example, interval of 95% "
                "should be as `interval = [2.5, 97.5]`."
            )

        if (interval[0] < 0.0) or (interval[0] >= 100.0):
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be >= 0 and < 100."
            )

        if (interval[1] <= 0.0) or (interval[1] > 100.0):
            raise ValueError(
                f"Upper interval bound ({interval[1]}) must be > 0 and <= 100."
            )

        if interval[0] >= interval[1]:
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be less than the "
                f"upper interval bound ({interval[1]})."
            )

        if ensure_symmetric_intervals and interval[0] + interval[1] != 100:
            raise ValueError(
                f"Interval must be symmetric, the sum of the lower, ({interval[0]}), "
                f"and upper, ({interval[1]}), interval bounds must be equal to "
                f"100. Got {interval[0] + interval[1]}."
            )

    return

check_predict_input(forecaster_name, steps, is_fitted, exog_in_, index_type_, index_freq_, window_size, last_window, last_window_exog=None, exog=None, exog_names_in_=None, interval=None, alpha=None, max_step=None, levels=None, levels_forecaster=None, series_names_in_=None, encoding=None)

Check all inputs of predict method. This is a helper function to validate that inputs used in predict method match attributes of a forecaster already trained.

Parameters:

Name Type Description Default
forecaster_name str

str Forecaster name.

required
steps Union[int, List[int]]

int, list Number of future steps predicted.

required
is_fitted bool

bool Tag to identify if the estimator has been fitted (trained).

required
exog_in_ bool

bool If the forecaster has been trained using exogenous variable/s.

required
index_type_ type

type Type of index of the input used in training.

required
index_freq_ str

str Frequency of Index of the input used in training.

required
window_size int

int Size of the window needed to create the predictors. It is equal to max_lag.

required
last_window Optional[Union[Series, DataFrame]]

pandas Series, pandas DataFrame, None Values of the series used to create the predictors (lags) need in the first iteration of prediction (t + 1).

required
last_window_exog Optional[Union[Series, DataFrame]]

pandas Series, pandas DataFrame, default None Values of the exogenous variables aligned with last_window in ForecasterStats predictions.

None
exog Optional[Union[Series, DataFrame, Dict[str, Union[Series, DataFrame]]]]

pandas Series, pandas DataFrame, dict, default None Exogenous variable/s included as predictor/s.

None
exog_names_in_ Optional[List[str]]

list, default None Names of the exogenous variables used during training.

None
interval Optional[List[float]]

list, tuple, default None Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5].

None
alpha Optional[float]

float, default None The confidence intervals used in ForecasterStats are (1 - alpha) %.

None
max_step Optional[int]

int, default None Maximum number of steps allowed (ForecasterDirect and ForecasterDirectMultiVariate).

None
levels Optional[Union[str, List[str]]]

str, list, default None Time series to be predicted (ForecasterRecursiveMultiSeries and `ForecasterRnn).

None
levels_forecaster Optional[Union[str, List[str]]]

str, list, default None Time series used as output data of a multiseries problem in a RNN problem (ForecasterRnn).

None
series_names_in_ Optional[List[str]]

list, default None Names of the columns used during fit (ForecasterRecursiveMultiSeries, ForecasterDirectMultiVariate and ForecasterRnn).

None
encoding Optional[str]

str, default None Encoding used to identify the different series (ForecasterRecursiveMultiSeries).

None

Returns:

Type Description
None

None

Source code in src/spotforecast2_safe/utils/validation.py
def check_predict_input(
    forecaster_name: str,
    steps: Union[int, List[int]],
    is_fitted: bool,
    exog_in_: bool,
    index_type_: type,
    index_freq_: str,
    window_size: int,
    last_window: Optional[Union[pd.Series, pd.DataFrame]],
    last_window_exog: Optional[Union[pd.Series, pd.DataFrame]] = None,
    exog: Optional[
        Union[pd.Series, pd.DataFrame, Dict[str, Union[pd.Series, pd.DataFrame]]]
    ] = None,
    exog_names_in_: Optional[List[str]] = None,
    interval: Optional[List[float]] = None,
    alpha: Optional[float] = None,
    max_step: Optional[int] = None,
    levels: Optional[Union[str, List[str]]] = None,
    levels_forecaster: Optional[Union[str, List[str]]] = None,
    series_names_in_: Optional[List[str]] = None,
    encoding: Optional[str] = None,
) -> None:
    """
    Check all inputs of predict method. This is a helper function to validate
    that inputs used in predict method match attributes of a forecaster already
    trained.

    Args:
        forecaster_name: str
            Forecaster name.
        steps: int, list
            Number of future steps predicted.
        is_fitted: bool
            Tag to identify if the estimator has been fitted (trained).
        exog_in_: bool
            If the forecaster has been trained using exogenous variable/s.
        index_type_: type
            Type of index of the input used in training.
        index_freq_: str
            Frequency of Index of the input used in training.
        window_size: int
            Size of the window needed to create the predictors. It is equal to
            `max_lag`.
        last_window: pandas Series, pandas DataFrame, None
            Values of the series used to create the predictors (lags) need in the
            first iteration of prediction (t + 1).
        last_window_exog: pandas Series, pandas DataFrame, default None
            Values of the exogenous variables aligned with `last_window` in
            ForecasterStats predictions.
        exog: pandas Series, pandas DataFrame, dict, default None
            Exogenous variable/s included as predictor/s.
        exog_names_in_: list, default None
            Names of the exogenous variables used during training.
        interval: list, tuple, default None
            Confidence of the prediction interval estimated. Sequence of percentiles
            to compute, which must be between 0 and 100 inclusive. For example,
            interval of 95% should be as `interval = [2.5, 97.5]`.
        alpha: float, default None
            The confidence intervals used in ForecasterStats are (1 - alpha) %.
        max_step: int, default None
            Maximum number of steps allowed (`ForecasterDirect` and
            `ForecasterDirectMultiVariate`).
        levels: str, list, default None
            Time series to be predicted (`ForecasterRecursiveMultiSeries`
            and `ForecasterRnn).
        levels_forecaster: str, list, default None
            Time series used as output data of a multiseries problem in a RNN problem
            (`ForecasterRnn`).
        series_names_in_: list, default None
            Names of the columns used during fit (`ForecasterRecursiveMultiSeries`,
            `ForecasterDirectMultiVariate` and `ForecasterRnn`).
        encoding: str, default None
            Encoding used to identify the different series (`ForecasterRecursiveMultiSeries`).

    Returns:
        None
    """

    if not is_fitted:
        raise RuntimeError(
            "This forecaster is not fitted yet. Call `fit` with appropriate "
            "arguments before using `predict`."
        )

    if isinstance(steps, (int, np.integer)) and steps < 1:
        raise ValueError(
            f"`steps` must be an integer greater than or equal to 1. Got {steps}."
        )

    if isinstance(steps, list) and min(steps) < 1:
        raise ValueError(
            f"`steps` must be a list of integers greater than or equal to 1. Got {steps}."
        )

    if max_step is not None:
        if isinstance(steps, (int, np.integer)):
            if steps > max_step:
                raise ValueError(
                    f"The maximum step that can be predicted is {max_step}. "
                    f"Got {steps}."
                )
        elif isinstance(steps, list):
            if max(steps) > max_step:
                raise ValueError(
                    f"The maximum step that can be predicted is {max_step}. "
                    f"Got {max(steps)}."
                )

    if interval is not None or alpha is not None:
        check_interval(interval=interval, alpha=alpha)

    if exog_in_ and exog is None:
        raise ValueError(
            "Forecaster trained with exogenous variable/s. "
            "Same variable/s must be provided when predicting."
        )

    if not exog_in_ and exog is not None:
        raise ValueError(
            "Forecaster trained without exogenous variable/s. "
            "`exog` must be `None` when predicting."
        )

    if exog is not None:
        # If exog is a dictionary, it is assumed that it contains the exogenous
        # variables for each series.
        if isinstance(exog, dict):
            # Check that all series have the exogenous variables
            if levels is None and series_names_in_ is not None:
                levels = series_names_in_

            if isinstance(levels, str):
                levels = [levels]

            if levels is not None:
                for level in levels:
                    if level not in exog:
                        raise ValueError(
                            f"Exogenous variables for series '{level}' are missing."
                        )
                    check_exog(
                        exog=exog[level],
                        allow_nan=False,
                        series_id=f"`exog` for series '{level}'",
                    )
                    check_exog_dtypes(
                        exog=exog[level],
                        call_check_exog=False,
                        series_id=f"`exog` for series '{level}'",
                    )

                    # Check that exogenous variables are the same as used in training
                    # Get the name of columns
                    if isinstance(exog[level], pd.Series):
                        exog_names = [exog[level].name]
                    else:
                        exog_names = exog[level].columns.tolist()

                    col_missing = set(exog_names_in_) - set(exog_names)
                    if col_missing:
                        raise ValueError(
                            f"Missing columns for series '{level}' in `exog`. "
                            f"Expected {exog_names_in_}. Got {exog_names}."
                        )
        else:
            check_exog(exog=exog, allow_nan=False)
            check_exog_dtypes(exog=exog, call_check_exog=False)

            # Check that exogenous variables are the same as used in training
            # Get the name of columns
            if isinstance(exog, pd.Series):
                exog_names = [exog.name]
            else:
                exog_names = exog.columns.tolist()

            col_missing = set(exog_names_in_) - set(exog_names)
            if col_missing:
                raise ValueError(
                    f"Missing columns in `exog`. Expected {exog_names_in_}. "
                    f"Got {exog_names}."
                )

    # Check last_window
    if last_window is not None:
        if isinstance(last_window, pd.DataFrame):
            if last_window.isna().to_numpy().any():
                raise ValueError("`last_window` has missing values.")
        else:
            check_y(last_window, series_id="`last_window`")

    return

check_preprocess_series(series)

Check and preprocess series argument in ForecasterRecursiveMultiSeries class.

- If `series` is a wide-format pandas DataFrame, each column represents a
different time series, and the index must be either a `DatetimeIndex` or
a `RangeIndex` with frequency or step size, as appropriate
- If `series` is a long-format pandas DataFrame with a MultiIndex, the
first level of the index must contain the series IDs, and the second
level must be a `DatetimeIndex` with the same frequency across all series.
- If series is a dictionary, each key must be a series ID, and each value
must be a named pandas Series. All series must have the same index, which
must be either a `DatetimeIndex` or a `RangeIndex`, and they must share the
same frequency or step size, as appropriate.

When series is a pandas DataFrame, it is converted to a dictionary of pandas Series, where the keys are the series IDs and the values are the Series with the same index as the original DataFrame.

Parameters:

Name Type Description Default
series DataFrame | dict[str, Series | DataFrame]

pandas DataFrame or dictionary of pandas Series/DataFrames

required

Returns:

Type Description
tuple[dict[str, Series], dict[str, Index]]

tuple[dict[str, pd.Series], dict[str, pd.Index]]: - series_dict: Dictionary where keys are series IDs and values are pandas Series. - series_indexes: Dictionary where keys are series IDs and values are the index of each series.

Raises: TypeError: If series is not a pandas DataFrame or a dictionary of pandas Series/DataFrames. TypeError: If the index of series is not a DatetimeIndex or RangeIndex with frequency/step size. ValueError: If the series in series have different frequencies or step sizes. ValueError: If all values of any series are NaN. UserWarning: If series is a wide-format DataFrame, only the first column will be used as series values. UserWarning: If series is a DataFrame (either wide or long format), additional internal transformations are required, which can increase computational time. It is recommended to use a dictionary of pandas Series instead.

Examples:

>>> import pandas as pd
>>> from spotforecast2_safe.forecaster.utils import check_preprocess_series
>>> # Example with wide-format DataFrame
>>> dates = pd.date_range('2020-01-01', periods=5, freq='D')
>>> df_wide = pd.DataFrame({
...     'series_1': [1, 2, 3, 4, 5],
...     'series_2': [5, 4, 3, 2, 1],
... }, index=dates)
>>> series_dict, series_indexes = check_preprocess_series(df_wide)
UserWarning: `series` DataFrame has multiple columns. Only the values of first column, 'series_1', will be used as series values. All other columns will be ignored.
UserWarning: Passing a DataFrame (either wide or long format) as `series` requires additional internal transformations, which can increase computational time.
It is recommended to use a dictionary of pandas Series instead.
>>> print(series_dict['series_1'])
2020-01-01    1
2020-01-02    2
2020-01-03    3
2020-01-04    4
2020-01-05    5
Name: series_1, dtype: int64
>>> print(series_indexes['series_1'])
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05'],
              dtype='datetime64[ns]', freq='D')
>>> # Example with long-format DataFrame
>>> df_long = pd.DataFrame({
...     'series_id': ['series_1'] * 5 + ['series_2'] * 5,
...     'value': [1, 2, 3, 4, 5, 5, 4, 3, 2, 1],
... }, index=pd.MultiIndex.from_product([['series_1', 'series_2'], dates], names=['series_id', 'date']))
>>> series_dict, series_indexes = check_preprocess_series(df_long)
UserWarning: `series` DataFrame has multiple columns. Only the values of first column, 'value', will be used as series values. All other columns will be ignored.
UserWarning: Passing a DataFrame (either wide or long format) as `series` requires additional internal transformations, which can increase computational time.
It is recommended to use a dictionary of pandas Series instead.
>>> print(series_dict['series_1'])
2020-01-01    1
2020-01-02    2
2020-01-03    3
2020-01-04    4
2020-01-05    5
Name: series_1, dtype: int64
>>> print(series_indexes['series_1'])
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
                  '2020-01-05'],
                 dtype='datetime64[ns]', freq='D')
>>> # Example with dictionary of Series
>>> series_dict_input = {
...     'series_1': pd.Series([1, 2, 3, 4, 5], index=dates),
...     'series_2': pd.Series([5, 4, 3, 2, 1], index=dates),
... }
>>> series_dict, series_indexes = check_preprocess_series(series_dict_input)
>>> print(series_dict['series_1'])
2020-01-01    1
2020-01-02    2
2020-01-03    3
2020-01-04    4
2020-01-05    5
Name: series_1, dtype: int64
>>> print(series_indexes['series_1'])
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05'],
              dtype='datetime64[ns]', freq='D')
    >>> # Example with dictionary of DataFrames
    >>> df_series_1 = pd.DataFrame({'value': [1, 2, 3, 4, 5]}, index=dates)
    >>> df_series_2 = pd.DataFrame({'value': [5, 4, 3, 2, 1]}, index=dates)
    >>> series_dict_input = {
    ...     'series_1': df_series_1,
    ...     'series_2': df_series_2,
    ... }
    >>> series_dict, series_indexes = check_preprocess_series(series_dict_input)
    >>> print(series_dict['series_1'])
2020-01-01    1
2020-01-02    2
2020-01-03    3
2020-01-04    4
2020-01-05    5
Name: series_1, dtype: int64
>>> print(series_indexes['series_1'])
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05'],
              dtype='datetime64[ns]', freq='D')
Source code in src/spotforecast2_safe/forecaster/utils.py
def check_preprocess_series(
    series: pd.DataFrame | dict[str, pd.Series | pd.DataFrame],
) -> tuple[dict[str, pd.Series], dict[str, pd.Index]]:
    """
    Check and preprocess `series` argument in `ForecasterRecursiveMultiSeries` class.

        - If `series` is a wide-format pandas DataFrame, each column represents a
        different time series, and the index must be either a `DatetimeIndex` or
        a `RangeIndex` with frequency or step size, as appropriate
        - If `series` is a long-format pandas DataFrame with a MultiIndex, the
        first level of the index must contain the series IDs, and the second
        level must be a `DatetimeIndex` with the same frequency across all series.
        - If series is a dictionary, each key must be a series ID, and each value
        must be a named pandas Series. All series must have the same index, which
        must be either a `DatetimeIndex` or a `RangeIndex`, and they must share the
        same frequency or step size, as appropriate.

    When `series` is a pandas DataFrame, it is converted to a dictionary of pandas
    Series, where the keys are the series IDs and the values are the Series with
    the same index as the original DataFrame.

    Args:
        series: pandas DataFrame or dictionary of pandas Series/DataFrames

    Returns:
        tuple[dict[str, pd.Series], dict[str, pd.Index]]:
            - series_dict: Dictionary where keys are series IDs and values are pandas Series.
            - series_indexes: Dictionary where keys are series IDs and values are the index of each series.
    Raises:
        TypeError:
            If `series` is not a pandas DataFrame or a dictionary of pandas Series/DataFrames.
        TypeError:
            If the index of `series` is not a DatetimeIndex or RangeIndex with frequency/step size.
        ValueError:
            If the series in `series` have different frequencies or step sizes.
        ValueError:
            If all values of any series are NaN.
        UserWarning:
            If `series` is a wide-format DataFrame, only the first column will be used as series values.
        UserWarning:
            If `series` is a DataFrame (either wide or long format), additional internal transformations are required, which can increase computational time.
            It is recommended to use a dictionary of pandas Series instead.

    Examples:
        >>> import pandas as pd
        >>> from spotforecast2_safe.forecaster.utils import check_preprocess_series
        >>> # Example with wide-format DataFrame
        >>> dates = pd.date_range('2020-01-01', periods=5, freq='D')
        >>> df_wide = pd.DataFrame({
        ...     'series_1': [1, 2, 3, 4, 5],
        ...     'series_2': [5, 4, 3, 2, 1],
        ... }, index=dates)
        >>> series_dict, series_indexes = check_preprocess_series(df_wide)
        UserWarning: `series` DataFrame has multiple columns. Only the values of first column, 'series_1', will be used as series values. All other columns will be ignored.
        UserWarning: Passing a DataFrame (either wide or long format) as `series` requires additional internal transformations, which can increase computational time.
        It is recommended to use a dictionary of pandas Series instead.
        >>> print(series_dict['series_1'])
        2020-01-01    1
        2020-01-02    2
        2020-01-03    3
        2020-01-04    4
        2020-01-05    5
        Name: series_1, dtype: int64
        >>> print(series_indexes['series_1'])
        DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
                       '2020-01-05'],
                      dtype='datetime64[ns]', freq='D')
        >>> # Example with long-format DataFrame
        >>> df_long = pd.DataFrame({
        ...     'series_id': ['series_1'] * 5 + ['series_2'] * 5,
        ...     'value': [1, 2, 3, 4, 5, 5, 4, 3, 2, 1],
        ... }, index=pd.MultiIndex.from_product([['series_1', 'series_2'], dates], names=['series_id', 'date']))
        >>> series_dict, series_indexes = check_preprocess_series(df_long)
        UserWarning: `series` DataFrame has multiple columns. Only the values of first column, 'value', will be used as series values. All other columns will be ignored.
        UserWarning: Passing a DataFrame (either wide or long format) as `series` requires additional internal transformations, which can increase computational time.
        It is recommended to use a dictionary of pandas Series instead.
        >>> print(series_dict['series_1'])
        2020-01-01    1
        2020-01-02    2
        2020-01-03    3
        2020-01-04    4
        2020-01-05    5
        Name: series_1, dtype: int64
        >>> print(series_indexes['series_1'])
        DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
                          '2020-01-05'],
                         dtype='datetime64[ns]', freq='D')

        >>> # Example with dictionary of Series
        >>> series_dict_input = {
        ...     'series_1': pd.Series([1, 2, 3, 4, 5], index=dates),
        ...     'series_2': pd.Series([5, 4, 3, 2, 1], index=dates),
        ... }
        >>> series_dict, series_indexes = check_preprocess_series(series_dict_input)
        >>> print(series_dict['series_1'])
        2020-01-01    1
        2020-01-02    2
        2020-01-03    3
        2020-01-04    4
        2020-01-05    5
        Name: series_1, dtype: int64
        >>> print(series_indexes['series_1'])
        DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
                       '2020-01-05'],
                      dtype='datetime64[ns]', freq='D')
            >>> # Example with dictionary of DataFrames
            >>> df_series_1 = pd.DataFrame({'value': [1, 2, 3, 4, 5]}, index=dates)
            >>> df_series_2 = pd.DataFrame({'value': [5, 4, 3, 2, 1]}, index=dates)
            >>> series_dict_input = {
            ...     'series_1': df_series_1,
            ...     'series_2': df_series_2,
            ... }
            >>> series_dict, series_indexes = check_preprocess_series(series_dict_input)
            >>> print(series_dict['series_1'])
        2020-01-01    1
        2020-01-02    2
        2020-01-03    3
        2020-01-04    4
        2020-01-05    5
        Name: series_1, dtype: int64
        >>> print(series_indexes['series_1'])
        DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
                       '2020-01-05'],
                      dtype='datetime64[ns]', freq='D')
    """
    if not isinstance(series, (pd.DataFrame, dict)):
        raise TypeError(
            f"`series` must be a pandas DataFrame or a dict of DataFrames or Series. "
            f"Got {type(series)}."
        )

    if isinstance(series, pd.DataFrame):

        if not isinstance(series.index, pd.MultiIndex):
            _, _ = check_extract_values_and_index(
                data=series, data_label="`series`", return_values=False
            )
            series = series.copy()
            series.index.name = None
            series_dict = series.to_dict(orient="series")
        else:
            if not isinstance(series.index.levels[1], pd.DatetimeIndex):
                raise TypeError(
                    f"The second level of the MultiIndex in `series` must be a "
                    f"pandas DatetimeIndex with the same frequency for each series. "
                    f"Found {type(series.index.levels[1])}."
                )

            first_col = series.columns[0]
            if len(series.columns) != 1:
                warnings.warn(
                    f"`series` DataFrame has multiple columns. Only the values of "
                    f"first column, '{first_col}', will be used as series values. "
                    f"All other columns will be ignored.",
                    IgnoredArgumentWarning,
                )

            series = series.copy()
            series.index = series.index.set_names([series.index.names[0], None])
            series_dict = {
                series_id: series.loc[series_id][first_col].rename(series_id)
                for series_id in series.index.levels[0]
            }

        warnings.warn(
            "Passing a DataFrame (either wide or long format) as `series` requires "
            "additional internal transformations, which can increase computational "
            "time. It is recommended to use a dictionary of pandas Series instead. ",
            InputTypeWarning,
        )

    else:

        not_valid_series = [
            k for k, v in series.items() if not isinstance(v, (pd.Series, pd.DataFrame))
        ]
        if not_valid_series:
            raise TypeError(
                f"If `series` is a dictionary, all series must be a named "
                f"pandas Series or a pandas DataFrame with a single column. "
                f"Review series: {not_valid_series}"
            )

        series_dict = {k: v.copy() for k, v in series.items()}

    not_valid_index = []
    indexes_freq = set()
    series_indexes = {}
    for k, v in series_dict.items():
        if isinstance(v, pd.DataFrame):
            if v.shape[1] != 1:
                raise ValueError(
                    f"If `series` is a dictionary, all series must be a named "
                    f"pandas Series or a pandas DataFrame with a single column. "
                    f"Review series: '{k}'"
                )
            series_dict[k] = v.iloc[:, 0]

        series_dict[k].name = k
        idx = v.index
        if isinstance(idx, pd.DatetimeIndex):
            indexes_freq.add(idx.freq)
        elif isinstance(idx, pd.RangeIndex):
            indexes_freq.add(idx.step)
        else:
            not_valid_index.append(k)

        if v.isna().to_numpy().all():
            raise ValueError(f"All values of series '{k}' are NaN.")

        series_indexes[k] = idx

    if not_valid_index:
        raise TypeError(
            f"If `series` is a dictionary, all series must have a Pandas "
            f"RangeIndex or DatetimeIndex with the same step/frequency. "
            f"Review series: {not_valid_index}"
        )
    if None in indexes_freq:
        raise ValueError(
            "If `series` is a dictionary, all series must have a Pandas "
            "RangeIndex or DatetimeIndex with the same step/frequency. "
            "If it a MultiIndex DataFrame, the second level must be a DatetimeIndex "
            "with the same frequency for each series. Found series with no "
            "frequency or step."
        )
    if not len(indexes_freq) == 1:
        raise ValueError(
            f"If `series` is a dictionary, all series must have a Pandas "
            f"RangeIndex or DatetimeIndex with the same step/frequency. "
            f"If it a MultiIndex DataFrame, the second level must be a DatetimeIndex "
            f"with the same frequency for each series. "
            f"Found frequencies: {sorted(indexes_freq)}"
        )

    return series_dict, series_indexes

check_residuals_input(forecaster_name, use_in_sample_residuals, in_sample_residuals_, out_sample_residuals_, use_binned_residuals, in_sample_residuals_by_bin_, out_sample_residuals_by_bin_, levels=None, encoding=None)

Check residuals input arguments in Forecasters.

Parameters:

Name Type Description Default
forecaster_name str

str Forecaster name.

required
use_in_sample_residuals bool

bool Indicates if in sample or out sample residuals are used.

required
in_sample_residuals_ ndarray | dict[str, ndarray] | None

numpy ndarray, dict Residuals of the model when predicting training data.

required
out_sample_residuals_ ndarray | dict[str, ndarray] | None

numpy ndarray, dict Residuals of the model when predicting non training data.

required
use_binned_residuals bool

bool Indicates if residuals are binned.

required
in_sample_residuals_by_bin_ dict[str | int, ndarray | dict[int, ndarray]] | None

dict In sample residuals binned according to the predicted value each residual is associated with.

required
out_sample_residuals_by_bin_ dict[str | int, ndarray | dict[int, ndarray]] | None

dict Out of sample residuals binned according to the predicted value each residual is associated with.

required
levels list[str] | None

list, default None Names of the series (levels) to be predicted (Forecasters multiseries).

None
encoding str | None

str, default None Encoding used to identify the different series (ForecasterRecursiveMultiSeries).

None

Returns:

Type Description
None

None

Examples:

from spotforecast2_safe.forecaster.utils import check_residuals_input import numpy as np forecaster_name = "ForecasterRecursiveMultiSeries" use_in_sample_residuals = True in_sample_residuals_ = np.array([0.1, -0.2 out_sample_residuals_ = np.array([0.3, -0.1]) use_binned_residuals = False check_residuals_input( forecaster_name, use_in_sample_residuals, in_sample_residuals_, out_sample_residuals_, use_binned_residuals, in_sample_residuals_by_bin_=None, out_sample_residuals_by_bin_=None, levels=['series_1', 'series_2'], encoding='onehot' )

Source code in src/spotforecast2_safe/forecaster/utils.py
def check_residuals_input(
    forecaster_name: str,
    use_in_sample_residuals: bool,
    in_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
    out_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
    use_binned_residuals: bool,
    in_sample_residuals_by_bin_: (
        dict[str | int, np.ndarray | dict[int, np.ndarray]] | None
    ),
    out_sample_residuals_by_bin_: (
        dict[str | int, np.ndarray | dict[int, np.ndarray]] | None
    ),
    levels: list[str] | None = None,
    encoding: str | None = None,
) -> None:
    """
    Check residuals input arguments in Forecasters.

    Args:
        forecaster_name: str
            Forecaster name.
        use_in_sample_residuals: bool
            Indicates if in sample or out sample residuals are used.
        in_sample_residuals_: numpy ndarray, dict
            Residuals of the model when predicting training data.
        out_sample_residuals_: numpy ndarray, dict
            Residuals of the model when predicting non training data.
        use_binned_residuals: bool
            Indicates if residuals are binned.
        in_sample_residuals_by_bin_: dict
            In sample residuals binned according to the predicted value each residual
            is associated with.
        out_sample_residuals_by_bin_: dict
            Out of sample residuals binned according to the predicted value each residual
            is associated with.
        levels: list, default None
            Names of the series (levels) to be predicted (Forecasters multiseries).
        encoding: str, default None
            Encoding used to identify the different series (ForecasterRecursiveMultiSeries).

    Returns:
        None

    Examples:
        from spotforecast2_safe.forecaster.utils import check_residuals_input
        import numpy as np
        forecaster_name = "ForecasterRecursiveMultiSeries"
        use_in_sample_residuals = True
        in_sample_residuals_ = np.array([0.1, -0.2
        out_sample_residuals_ = np.array([0.3, -0.1])
        use_binned_residuals = False
        check_residuals_input(
            forecaster_name,
            use_in_sample_residuals,
            in_sample_residuals_,
            out_sample_residuals_,
            use_binned_residuals,
            in_sample_residuals_by_bin_=None,
            out_sample_residuals_by_bin_=None,
            levels=['series_1', 'series_2'],
            encoding='onehot'
        )
    """

    forecasters_multiseries = (
        "ForecasterRecursiveMultiSeries",
        "ForecasterDirectMultiVariate",
        "ForecasterRnn",
    )

    if use_in_sample_residuals:
        if use_binned_residuals:
            residuals = in_sample_residuals_by_bin_
            literal = "in_sample_residuals_by_bin_"
        else:
            residuals = in_sample_residuals_
            literal = "in_sample_residuals_"

        # Check if residuals are empty or None
        is_empty = (
            residuals is None
            or (isinstance(residuals, dict) and not residuals)
            or (isinstance(residuals, np.ndarray) and residuals.size == 0)
        )
        if is_empty:
            raise ValueError(
                f"`forecaster.{literal}` is either None or empty. Use "
                f"`store_in_sample_residuals = True` when fitting the forecaster "
                f"or use the `set_in_sample_residuals()` method before predicting."
            )

        if forecaster_name in forecasters_multiseries:
            if encoding is not None:
                unknown_levels = set(levels) - set(residuals.keys())
                if unknown_levels:
                    warnings.warn(
                        f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
                        f"most likely because they were not present in the training data. "
                        f"A random sample of the residuals from other levels will be used. "
                        f"This can lead to inaccurate intervals for the unknown levels.",
                        UnknownLevelWarning,
                    )
    else:
        if use_binned_residuals:
            residuals = out_sample_residuals_by_bin_
            literal = "out_sample_residuals_by_bin_"
        else:
            residuals = out_sample_residuals_
            literal = "out_sample_residuals_"

        is_empty = (
            residuals is None
            or (isinstance(residuals, dict) and not residuals)
            or (isinstance(residuals, np.ndarray) and residuals.size == 0)
        )
        if is_empty:
            raise ValueError(
                f"`forecaster.{literal}` is either None or empty. Use "
                f"`set_out_sample_residuals()` method before predicting."
            )

        if forecaster_name in forecasters_multiseries:
            if encoding is not None:
                unknown_levels = set(levels) - set(residuals.keys())
                if unknown_levels:
                    warnings.warn(
                        f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
                        f"most likely because they were not present in the training data. "
                        f"A random sample of the residuals from other levels will be used. "
                        f"This can lead to inaccurate intervals for the unknown levels.",
                        UnknownLevelWarning,
                    )

check_select_fit_kwargs(estimator, fit_kwargs=None)

Check if fit_kwargs is a dict and select only keys used by estimator's fit.

This function validates that fit_kwargs is a dictionary, warns about unused arguments, removes 'sample_weight' (which should be handled via weight_func), and returns a dictionary containing only the arguments accepted by the estimator's fit method.

Parameters:

Name Type Description Default
estimator Any

Scikit-learn compatible estimator.

required
fit_kwargs Optional[dict]

Dictionary of arguments to pass to the estimator's fit method.

None

Returns:

Type Description
dict

Dictionary with only the arguments accepted by the estimator's fit method.

Raises:

Type Description
TypeError

If fit_kwargs is not a dict.

Warns:

Type Description
IgnoredArgumentWarning

If fit_kwargs contains keys not used by fit method, or if 'sample_weight' is present (it gets removed).

Examples:

>>> from sklearn.linear_model import Ridge
>>> from spotforecast2_safe.utils.forecaster_config import check_select_fit_kwargs
>>>
>>> estimator = Ridge()
>>> # Valid argument for Ridge.fit
>>> kwargs = {"sample_weight": [1, 1], "invalid_arg": 10}
>>> # sample_weight is removed (should be passed via weight_func in forecaster)
>>> # invalid_arg is ignored
>>> filtered = check_select_fit_kwargs(estimator, kwargs)
>>> filtered
{}
Source code in src/spotforecast2_safe/utils/forecaster_config.py
def check_select_fit_kwargs(estimator: Any, fit_kwargs: Optional[dict] = None) -> dict:
    """
    Check if `fit_kwargs` is a dict and select only keys used by estimator's `fit`.

    This function validates that fit_kwargs is a dictionary, warns about unused arguments,
    removes 'sample_weight' (which should be handled via weight_func), and returns
    a dictionary containing only the arguments accepted by the estimator's fit method.

    Args:
        estimator: Scikit-learn compatible estimator.
        fit_kwargs: Dictionary of arguments to pass to the estimator's fit method.

    Returns:
        Dictionary with only the arguments accepted by the estimator's fit method.

    Raises:
        TypeError: If fit_kwargs is not a dict.

    Warnings:
        IgnoredArgumentWarning: If fit_kwargs contains keys not used by fit method,
            or if 'sample_weight' is present (it gets removed).

    Examples:
        >>> from sklearn.linear_model import Ridge
        >>> from spotforecast2_safe.utils.forecaster_config import check_select_fit_kwargs
        >>>
        >>> estimator = Ridge()
        >>> # Valid argument for Ridge.fit
        >>> kwargs = {"sample_weight": [1, 1], "invalid_arg": 10}
        >>> # sample_weight is removed (should be passed via weight_func in forecaster)
        >>> # invalid_arg is ignored
        >>> filtered = check_select_fit_kwargs(estimator, kwargs)
        >>> filtered
        {}
    """
    import inspect
    import warnings

    # Import IgnoredArgumentWarning if available, otherwise define locally
    try:
        from spotforecast2_safe.exceptions import IgnoredArgumentWarning
    except ImportError:

        class IgnoredArgumentWarning(UserWarning):
            """Warning for ignored arguments."""

            pass

    if fit_kwargs is None:
        fit_kwargs = {}
    else:
        if not isinstance(fit_kwargs, dict):
            raise TypeError(
                f"Argument `fit_kwargs` must be a dict. Got {type(fit_kwargs)}."
            )

        # Get parameters accepted by estimator.fit
        fit_params = inspect.signature(estimator.fit).parameters

        # Identify unused keys
        non_used_keys = [k for k in fit_kwargs.keys() if k not in fit_params]
        if non_used_keys:
            warnings.warn(
                f"Argument/s {non_used_keys} ignored since they are not used by the "
                f"estimator's `fit` method.",
                IgnoredArgumentWarning,
            )

        # Handle sample_weight specially
        if "sample_weight" in fit_kwargs.keys():
            warnings.warn(
                "The `sample_weight` argument is ignored. Use `weight_func` to pass "
                "a function that defines the individual weights for each sample "
                "based on its index.",
                IgnoredArgumentWarning,
            )
            del fit_kwargs["sample_weight"]

        # Select only the keyword arguments allowed by the estimator's `fit` method.
        # Note: We need to re-check keys because sample_weight might have been deleted but it might be in fit_params
        # If it was deleted, it is no longer in fit_kwargs, so this comprehension is safe
        fit_kwargs = {k: v for k, v in fit_kwargs.items() if k in fit_params}

    return fit_kwargs

check_y(y, series_id='`y`')

Validate that y is a pandas Series without missing values.

This function ensures that the input time series meets the basic requirements for forecasting: it must be a pandas Series and must not contain any NaN values.

Parameters:

Name Type Description Default
y Any

Time series values to validate.

required
series_id str

Identifier of the series used in error messages. Defaults to "y".

'`y`'

Raises:

Type Description
TypeError

If y is not a pandas Series.

ValueError

If y contains missing (NaN) values.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.utils.validation import check_y
>>>
>>> # Valid series
>>> y = pd.Series([1, 2, 3, 4, 5])
>>> check_y(y)  # No error
>>>
>>> # Invalid: not a Series
>>> try:
...     check_y([1, 2, 3])
... except TypeError as e:
...     print(f"Error: {e}")
Error: `y` must be a pandas Series with a DatetimeIndex or a RangeIndex. Found <class 'list'>.
>>>
>>> # Invalid: contains NaN
>>> y_with_nan = pd.Series([1, 2, np.nan, 4])
>>> try:
...     check_y(y_with_nan)
... except ValueError as e:
...     print(f"Error: {e}")
Error: `y` has missing values.
Source code in src/spotforecast2_safe/utils/validation.py
def check_y(y: Any, series_id: str = "`y`") -> None:
    """
    Validate that y is a pandas Series without missing values.

    This function ensures that the input time series meets the basic requirements
    for forecasting: it must be a pandas Series and must not contain any NaN values.

    Args:
        y: Time series values to validate.
        series_id: Identifier of the series used in error messages. Defaults to "`y`".

    Raises:
        TypeError: If y is not a pandas Series.
        ValueError: If y contains missing (NaN) values.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.utils.validation import check_y
        >>>
        >>> # Valid series
        >>> y = pd.Series([1, 2, 3, 4, 5])
        >>> check_y(y)  # No error
        >>>
        >>> # Invalid: not a Series
        >>> try:
        ...     check_y([1, 2, 3])
        ... except TypeError as e:
        ...     print(f"Error: {e}")
        Error: `y` must be a pandas Series with a DatetimeIndex or a RangeIndex. Found <class 'list'>.
        >>>
        >>> # Invalid: contains NaN
        >>> y_with_nan = pd.Series([1, 2, np.nan, 4])
        >>> try:
        ...     check_y(y_with_nan)
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: `y` has missing values.
    """
    if not isinstance(y, pd.Series):
        raise TypeError(
            f"{series_id} must be a pandas Series with a DatetimeIndex or a RangeIndex. "
            f"Found {type(y)}."
        )

    if y.isna().to_numpy().any():
        raise ValueError(f"{series_id} has missing values.")

    return

date_to_index_position(index, date_input, method='prediction', date_literal='steps', kwargs_pd_to_datetime={})

Transform a datetime string or pandas Timestamp to an integer. The integer represents the position of the datetime in the index.

Parameters:

Name Type Description Default
index Index

pandas Index Original datetime index (must be a pandas DatetimeIndex if date_input is not an int).

required
date_input int | str | Timestamp

int, str, pandas Timestamp Datetime to transform to integer.

  • If int, returns the same integer.
  • If str or pandas Timestamp, it is converted and expanded into the index.
required
method str

str, default 'prediction' Can be 'prediction' or 'validation'.

  • If 'prediction', the date must be later than the last date in the index.
  • If 'validation', the date must be within the index range.
'prediction'
date_literal str

str, default 'steps' Variable name used in error messages.

'steps'
kwargs_pd_to_datetime dict

dict, default {} Additional keyword arguments to pass to pd.to_datetime().

{}

Returns:

Name Type Description
int int

date_input transformed to integer position in the index.

int
  • If date_input is an integer, it returns the same integer.
int
  • If method is 'prediction', number of steps to predict from the last
int

date in the index.

int
  • If method is 'validation', position plus one of the date in the index,
int

this is done to include the target date in the training set when using

int

pandas iloc with slices.

Raises:

Type Description
ValueError

If method is not 'prediction' or 'validation'.

TypeError

If date_input is not an int, str, or pandas Timestamp.

TypeError

If index is not a pandas DatetimeIndex when date_input is not an int.

ValueError

If date_input is a date and does not meet the requirements based on the method argument.

Examples:

from spotforecast2_safe.forecaster.utils import date_to_index_position import pandas as pd index = pd.date_range(start='2020-01-01', periods=10, freq='D')

Using an integer input

position = date_to_index_position(index, 5) print(position)

Output: 5

Using a date input for prediction

position = date_to_index_position(index, '2020-01-15', method='prediction') print(position)

Output: 5 (number of steps from the last date in the index to the target date)

Using a date input for validation

position = date_to_index_position(index, '2020-01-05', method='validation') print(position)

Output: 5 (position plus one of the target date in the index)

Source code in src/spotforecast2_safe/forecaster/utils.py
def date_to_index_position(
    index: pd.Index,
    date_input: int | str | pd.Timestamp,
    method: str = "prediction",
    date_literal: str = "steps",
    kwargs_pd_to_datetime: dict = {},
) -> int:
    """
    Transform a datetime string or pandas Timestamp to an integer. The integer
    represents the position of the datetime in the index.

    Args:
        index: pandas Index
            Original datetime index (must be a pandas DatetimeIndex if `date_input`
            is not an int).
        date_input: int, str, pandas Timestamp
            Datetime to transform to integer.

            - If int, returns the same integer.
            - If str or pandas Timestamp, it is converted and expanded into the index.
        method: str, default 'prediction'
            Can be 'prediction' or 'validation'.

            - If 'prediction', the date must be later than the last date in the index.
            - If 'validation', the date must be within the index range.
        date_literal: str, default 'steps'
            Variable name used in error messages.
        kwargs_pd_to_datetime: dict, default {}
            Additional keyword arguments to pass to `pd.to_datetime()`.

    Returns:
        int:
            `date_input` transformed to integer position in the `index`.

        + If `date_input` is an integer, it returns the same integer.
        + If method is 'prediction', number of steps to predict from the last
        date in the index.
        + If method is 'validation', position plus one of the date in the index,
        this is done to include the target date in the training set when using
        pandas iloc with slices.

    Raises:
        ValueError: If `method` is not 'prediction' or 'validation'.
        TypeError: If `date_input` is not an int, str, or pandas Timestamp.
        TypeError: If `index` is not a pandas DatetimeIndex when `date_input` is not an int.
        ValueError: If `date_input` is a date and does not meet the requirements based on the `method` argument.

    Examples:
        from spotforecast2_safe.forecaster.utils import date_to_index_position
        import pandas as pd
        index = pd.date_range(start='2020-01-01', periods=10, freq='D')
        # Using an integer input
        position = date_to_index_position(index, 5)
        print(position)
        # Output: 5
        # Using a date input for prediction
        position = date_to_index_position(index, '2020-01-15', method='prediction')
        print(position)
        # Output: 5 (number of steps from the last date in the index to the target date)
        # Using a date input for validation
        position = date_to_index_position(index, '2020-01-05', method='validation')
        print(position)
        # Output: 5 (position plus one of the target date in the index)
    """

    if method not in ["prediction", "validation"]:
        raise ValueError("`method` must be 'prediction' or 'validation'.")

    # Initialize output; will be set in all valid code paths below
    output: int = 0

    if isinstance(date_input, (str, pd.Timestamp)):
        if not isinstance(index, pd.DatetimeIndex):
            raise TypeError(
                f"Index must be a pandas DatetimeIndex when `{date_literal}` is "
                f"not an integer. Check input series or last window."
            )

        target_date = pd.to_datetime(date_input, **kwargs_pd_to_datetime)
        last_date = pd.to_datetime(index[-1])

        if method == "prediction":
            if target_date <= last_date:
                raise ValueError(
                    "If `steps` is a date, it must be greater than the last date "
                    "in the index."
                )
            span_index = pd.date_range(
                start=last_date, end=target_date, freq=index.freq
            )
            output = len(span_index) - 1
        elif method == "validation":
            first_date = pd.to_datetime(index[0])
            if target_date < first_date or target_date > last_date:
                raise ValueError(
                    "If `initial_train_size` is a date, it must be greater than "
                    "the first date in the index and less than the last date."
                )
            span_index = pd.date_range(
                start=first_date, end=target_date, freq=index.freq
            )
            output = len(span_index)

    elif isinstance(date_input, (int, np.integer)):
        output = date_input

    else:
        raise TypeError(
            f"`{date_literal}` must be an integer, string, or pandas Timestamp."
        )

    return output

exog_to_direct(exog, steps)

Transforms exog to a pandas DataFrame with the shape needed for Direct forecasting.

Parameters:

Name Type Description Default
exog Series | DataFrame

pandas Series, pandas DataFrame Exogenous variables.

required
steps int

int Number of steps that will be predicted using exog.

required

Returns:

Type Description
tuple[DataFrame, list[str]]

tuple[pd.DataFrame, list[str]]: exog_direct: pandas DataFrame Exogenous variables transformed. exog_direct_names: list Names of the columns of the exogenous variables transformed. Only created if exog is a pandas Series or DataFrame.

Source code in src/spotforecast2_safe/forecaster/utils.py
def exog_to_direct(
    exog: pd.Series | pd.DataFrame, steps: int
) -> tuple[pd.DataFrame, list[str]]:
    """
    Transforms `exog` to a pandas DataFrame with the shape needed for Direct
    forecasting.

    Args:
        exog: pandas Series, pandas DataFrame
            Exogenous variables.
        steps: int
            Number of steps that will be predicted using exog.

    Returns:
        tuple[pd.DataFrame, list[str]]:
            exog_direct: pandas DataFrame
                Exogenous variables transformed.
            exog_direct_names: list
                Names of the columns of the exogenous variables transformed. Only
                created if `exog` is a pandas Series or DataFrame.
    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(
            f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}."
        )

    if isinstance(exog, pd.Series):
        exog = exog.to_frame()

    n_rows = len(exog)
    exog_idx = exog.index
    exog_cols = exog.columns
    exog_direct = []
    for i in range(steps):
        exog_step = exog.iloc[i : n_rows - (steps - 1 - i),]
        exog_step.index = pd.RangeIndex(len(exog_step))
        exog_step.columns = [f"{col}_step_{i + 1}" for col in exog_cols]
        exog_direct.append(exog_step)

    exog_direct = pd.concat(exog_direct, axis=1) if steps > 1 else exog_direct[0]

    exog_direct_names = exog_direct.columns.to_list()
    exog_direct.index = exog_idx[-len(exog_direct) :]

    return exog_direct, exog_direct_names

exog_to_direct_numpy(exog, steps)

Transforms exog to numpy ndarray with the shape needed for Direct forecasting.

Parameters:

Name Type Description Default
exog ndarray | Series | DataFrame

numpy ndarray, pandas Series, pandas DataFrame Exogenous variables, shape(samples,). If exog is a pandas format, the direct exog names are created.

required
steps int

int Number of steps that will be predicted using exog.

required

Returns:

Type Description
tuple[ndarray, list[str] | None]

tuple[np.ndarray, list[str] | None]: exog_direct: numpy ndarray Exogenous variables transformed. exog_direct_names: list, None Names of the columns of the exogenous variables transformed. Only created if exog is a pandas Series or DataFrame.

Examples:

from spotforecast2_safe.forecaster.utils import exog_to_direct_numpy import numpy as np exog = np.array([10, 20, 30, 40, 50]) steps = 3 exog_direct, exog_direct_names = exog_to_direct_numpy(exog, steps) print(exog_direct) [[10 20 30] [20 30 40] [30 40 50]] print(exog_direct_names) None

Source code in src/spotforecast2_safe/forecaster/utils.py
def exog_to_direct_numpy(
    exog: np.ndarray | pd.Series | pd.DataFrame, steps: int
) -> tuple[np.ndarray, list[str] | None]:
    """
    Transforms `exog` to numpy ndarray with the shape needed for Direct
    forecasting.

    Args:
        exog: numpy ndarray, pandas Series, pandas DataFrame
            Exogenous variables, shape(samples,). If exog is a pandas format, the
            direct exog names are created.
        steps: int
            Number of steps that will be predicted using exog.

    Returns:
        tuple[np.ndarray, list[str] | None]:
            exog_direct: numpy ndarray
                Exogenous variables transformed.
            exog_direct_names: list, None
                Names of the columns of the exogenous variables transformed. Only
                created if `exog` is a pandas Series or DataFrame.

    Examples:
        from spotforecast2_safe.forecaster.utils import exog_to_direct_numpy
        import numpy as np
        exog = np.array([10, 20, 30, 40, 50])
        steps = 3
        exog_direct, exog_direct_names = exog_to_direct_numpy(exog, steps)
        print(exog_direct)
            [[10 20 30]
            [20 30 40]
            [30 40 50]]
        print(exog_direct_names)
        None
    """

    if isinstance(exog, (pd.Series, pd.DataFrame)):
        exog_cols = exog.columns if isinstance(exog, pd.DataFrame) else [exog.name]
        exog_direct_names = [
            f"{col}_step_{i + 1}" for i in range(steps) for col in exog_cols
        ]
        exog = exog.to_numpy()
    else:
        exog_direct_names = None
        if not isinstance(exog, np.ndarray):
            raise TypeError(
                f"`exog` must be a numpy ndarray, pandas Series or DataFrame. "
                f"Got {type(exog)}."
            )

    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    n_rows = len(exog)
    exog_direct = [exog[i : n_rows - (steps - 1 - i)] for i in range(steps)]
    exog_direct = np.concatenate(exog_direct, axis=1) if steps > 1 else exog_direct[0]

    return exog_direct, exog_direct_names

expand_index(index, steps)

Create a new index extending from the end of the original index.

This function generates future indices for forecasting by extending the time series index by a specified number of steps. Handles both DatetimeIndex and RangeIndex appropriately.

Parameters:

Name Type Description Default
index Union[Index, None]

Original pandas Index (DatetimeIndex or RangeIndex). If None, creates a RangeIndex starting from 0.

required
steps int

Number of future steps to generate.

required

Returns:

Type Description
Index

New pandas Index with steps future periods.

Raises:

Type Description
TypeError

If steps is not an integer, or if index is neither DatetimeIndex nor RangeIndex.

Examples:

>>> import pandas as pd
>>> from spotforecast2_safe.utils.data_transform import expand_index
>>>
>>> # DatetimeIndex
>>> dates = pd.date_range("2023-01-01", periods=5, freq="D")
>>> new_index = expand_index(dates, 3)
>>> new_index
DatetimeIndex(['2023-01-06', '2023-01-07', '2023-01-08'], dtype='datetime64[ns]', freq='D')
>>>
>>> # RangeIndex
>>> range_idx = pd.RangeIndex(start=0, stop=10)
>>> new_index = expand_index(range_idx, 5)
>>> new_index
RangeIndex(start=10, stop=15, step=1)
>>>
>>> # None index (creates new RangeIndex)
>>> new_index = expand_index(None, 3)
>>> new_index
RangeIndex(start=0, stop=3, step=1)
>>>
>>> # Invalid: steps not an integer
>>> try:
...     expand_index(dates, 3.5)
... except TypeError as e:
...     print("Error: steps must be an integer")
Error: steps must be an integer
Source code in src/spotforecast2_safe/utils/data_transform.py
def expand_index(index: Union[pd.Index, None], steps: int) -> pd.Index:
    """
    Create a new index extending from the end of the original index.

    This function generates future indices for forecasting by extending the time
    series index by a specified number of steps. Handles both DatetimeIndex and
    RangeIndex appropriately.

    Args:
        index: Original pandas Index (DatetimeIndex or RangeIndex). If None,
            creates a RangeIndex starting from 0.
        steps: Number of future steps to generate.

    Returns:
        New pandas Index with `steps` future periods.

    Raises:
        TypeError: If steps is not an integer, or if index is neither DatetimeIndex
            nor RangeIndex.

    Examples:
        >>> import pandas as pd
        >>> from spotforecast2_safe.utils.data_transform import expand_index
        >>>
        >>> # DatetimeIndex
        >>> dates = pd.date_range("2023-01-01", periods=5, freq="D")
        >>> new_index = expand_index(dates, 3)
        >>> new_index
        DatetimeIndex(['2023-01-06', '2023-01-07', '2023-01-08'], dtype='datetime64[ns]', freq='D')
        >>>
        >>> # RangeIndex
        >>> range_idx = pd.RangeIndex(start=0, stop=10)
        >>> new_index = expand_index(range_idx, 5)
        >>> new_index
        RangeIndex(start=10, stop=15, step=1)
        >>>
        >>> # None index (creates new RangeIndex)
        >>> new_index = expand_index(None, 3)
        >>> new_index
        RangeIndex(start=0, stop=3, step=1)
        >>>
        >>> # Invalid: steps not an integer
        >>> try:
        ...     expand_index(dates, 3.5)
        ... except TypeError as e:
        ...     print("Error: steps must be an integer")
        Error: steps must be an integer
    """
    if not isinstance(steps, (int, np.integer)):
        raise TypeError(f"`steps` must be an integer. Got {type(steps)}.")

    # Convert numpy integer to Python int if needed
    if isinstance(steps, np.integer):
        steps = int(steps)

    if isinstance(index, pd.Index):
        if isinstance(index, pd.DatetimeIndex):
            new_index = pd.date_range(
                start=index[-1] + index.freq, periods=steps, freq=index.freq
            )
        elif isinstance(index, pd.RangeIndex):
            new_index = pd.RangeIndex(start=index[-1] + 1, stop=index[-1] + 1 + steps)
        else:
            raise TypeError(
                "Argument `index` must be a pandas DatetimeIndex or RangeIndex."
            )
    else:
        new_index = pd.RangeIndex(start=0, stop=steps)

    return new_index

get_exog_dtypes(exog)

Extract and store the data types of exogenous variables.

This function returns a dictionary mapping column names to their data types. For Series, uses the series name as the key. For DataFrames, uses all column names.

Parameters:

Name Type Description Default
exog Union[Series, DataFrame]

Exogenous variable/s (Series or DataFrame).

required

Returns:

Type Description
Dict[str, type]

Dictionary mapping variable names to their pandas dtypes.

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> from spotforecast2_safe.utils.validation import get_exog_dtypes
>>>
>>> # DataFrame with mixed types
>>> exog_df = pd.DataFrame({
...     "temp": pd.Series([20.5, 21.3, 22.1], dtype='float64'),
...     "day": pd.Series([1, 2, 3], dtype='int64'),
...     "is_weekend": pd.Series([False, False, True], dtype='bool')
... })
>>> dtypes = get_exog_dtypes(exog_df)
>>> dtypes['temp']
dtype('float64')
>>> dtypes['day']
dtype('int64')
>>>
>>> # Series
>>> exog_series = pd.Series([1.0, 2.0, 3.0], name="temperature", dtype='float64')
>>> dtypes = get_exog_dtypes(exog_series)
>>> dtypes
{'temperature': dtype('float64')}
Source code in src/spotforecast2_safe/utils/validation.py
def get_exog_dtypes(exog: Union[pd.Series, pd.DataFrame]) -> Dict[str, type]:
    """
    Extract and store the data types of exogenous variables.

    This function returns a dictionary mapping column names to their data types.
    For Series, uses the series name as the key. For DataFrames, uses all column names.

    Args:
        exog: Exogenous variable/s (Series or DataFrame).

    Returns:
        Dictionary mapping variable names to their pandas dtypes.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> from spotforecast2_safe.utils.validation import get_exog_dtypes
        >>>
        >>> # DataFrame with mixed types
        >>> exog_df = pd.DataFrame({
        ...     "temp": pd.Series([20.5, 21.3, 22.1], dtype='float64'),
        ...     "day": pd.Series([1, 2, 3], dtype='int64'),
        ...     "is_weekend": pd.Series([False, False, True], dtype='bool')
        ... })
        >>> dtypes = get_exog_dtypes(exog_df)
        >>> dtypes['temp']
        dtype('float64')
        >>> dtypes['day']
        dtype('int64')
        >>>
        >>> # Series
        >>> exog_series = pd.Series([1.0, 2.0, 3.0], name="temperature", dtype='float64')
        >>> dtypes = get_exog_dtypes(exog_series)
        >>> dtypes
        {'temperature': dtype('float64')}
    """
    if isinstance(exog, pd.Series):
        exog_dtypes = {exog.name: exog.dtypes}
    else:
        exog_dtypes = exog.dtypes.to_dict()

    return exog_dtypes

get_style_repr_html(is_fitted=False)

Generate CSS style for HTML representation of the Forecaster.

Creates a unique CSS style block with a container ID for rendering forecaster objects in Jupyter notebooks or HTML documents. The styling provides a clean, monospace display with a light gray background.

Parameters:

Name Type Description Default
is_fitted bool

Parameter to indicate if the Forecaster has been fitted. Currently not used in styling but reserved for future extensions.

False

Returns:

Name Type Description
tuple Tuple[str, str]

A tuple containing: - style (str): CSS style block as a string with unique container class. - unique_id (str): Unique 8-character ID for the container element.

Examples:

>>> style, uid = get_style_repr_html(is_fitted=True)
>>> print(f"Container ID: {uid}")
Container ID: a1b2c3d4
>>> print(f"Style contains CSS: {'container-' in style}")
Style contains CSS: True

Using in HTML rendering:

>>> style, uid = get_style_repr_html(is_fitted=False)
>>> html = f"{style}<div class='container-{uid}'>Forecaster Info</div>"
>>> print("background-color" in html)
True
Source code in src/spotforecast2_safe/forecaster/utils.py
def get_style_repr_html(is_fitted: bool = False) -> Tuple[str, str]:
    """Generate CSS style for HTML representation of the Forecaster.

    Creates a unique CSS style block with a container ID for rendering
    forecaster objects in Jupyter notebooks or HTML documents. The styling
    provides a clean, monospace display with a light gray background.

    Args:
        is_fitted: Parameter to indicate if the Forecaster has been fitted.
            Currently not used in styling but reserved for future extensions.

    Returns:
        tuple: A tuple containing:
            - style (str): CSS style block as a string with unique container class.
            - unique_id (str): Unique 8-character ID for the container element.

    Examples:
        >>> style, uid = get_style_repr_html(is_fitted=True)
        >>> print(f"Container ID: {uid}")
        Container ID: a1b2c3d4
        >>> print(f"Style contains CSS: {'container-' in style}")
        Style contains CSS: True

        Using in HTML rendering:
        >>> style, uid = get_style_repr_html(is_fitted=False)
        >>> html = f"{style}<div class='container-{uid}'>Forecaster Info</div>"
        >>> print("background-color" in html)
        True
    """

    unique_id = str(uuid.uuid4())[:8]
    style = f"""
    <style>
        .container-{unique_id} {{
            font-family: monospace;
            background-color: #f0f0f0;
            padding: 10px;
            border-radius: 5px;
        }}
    </style>
    """
    return style, unique_id

initialize_estimator(estimator=None, regressor=None)

Helper to handle the deprecation of 'regressor' in favor of 'estimator'. Returns the valid estimator object.

Parameters:

Name Type Description Default
estimator object | None

estimator or pipeline compatible with the scikit-learn API, default None An instance of a estimator or pipeline compatible with the scikit-learn API.

None
regressor object | None

estimator or pipeline compatible with the scikit-learn API, default None Deprecated. An instance of a estimator or pipeline compatible with the scikit-learn API.

None

Returns:

Type Description
None

estimator or pipeline compatible with the scikit-learn API The valid estimator object.

Raises:

Type Description
ValueError

If both estimator and regressor are provided. Use only estimator.

Warning

If regressor is provided, a FutureWarning is raised indicating that it is deprecated and will be removed in a future version.

Examples:

from spotforecast2_safe.forecaster.utils import initialize_estimator from sklearn.linear_model import LinearRegression

Using the estimator argument

estimator = LinearRegression() result = initialize_estimator(estimator=estimator) print(result) LinearRegression()

Using the deprecated regressor argument

regressor = LinearRegression() result = initialize_estimator(regressor=regressor) print(result) LinearRegression()

Source code in src/spotforecast2_safe/forecaster/utils.py
def initialize_estimator(
    estimator: object | None = None, regressor: object | None = None
) -> None:
    """
    Helper to handle the deprecation of 'regressor' in favor of 'estimator'.
    Returns the valid estimator object.

    Args:
        estimator: estimator or pipeline compatible with the scikit-learn API, default None
            An instance of a estimator or pipeline compatible with the scikit-learn API.
        regressor: estimator or pipeline compatible with the scikit-learn API, default None
            Deprecated. An instance of a estimator or pipeline compatible with the
            scikit-learn API.

    Returns:
        estimator or pipeline compatible with the scikit-learn API
            The valid estimator object.

    Raises:
        ValueError: If both `estimator` and `regressor` are provided. Use only `estimator`.
        Warning: If `regressor` is provided, a FutureWarning is raised indicating that it is deprecated and will be removed in a future version.

    Examples:
        from spotforecast2_safe.forecaster.utils import initialize_estimator
        from sklearn.linear_model import LinearRegression
        # Using the `estimator` argument
        estimator = LinearRegression()
        result = initialize_estimator(estimator=estimator)
        print(result)
        LinearRegression()
        # Using the deprecated `regressor` argument
        regressor = LinearRegression()
        result = initialize_estimator(regressor=regressor)
        print(result)
        LinearRegression()

    """

    if regressor is not None:
        warnings.warn(
            "The `regressor` argument is deprecated and will be removed in a future "
            "version. Please use `estimator` instead.",
            FutureWarning,
            stacklevel=3,  # Important: to point to the user's code
        )
        if estimator is not None:
            raise ValueError(
                "Both `estimator` and `regressor` were provided. Use only `estimator`."
            )
        return regressor

    return estimator

initialize_lags(forecaster_name, lags)

Validate and normalize lag specification for forecasting.

This function converts various lag specifications (int, list, tuple, range, ndarray) into a standardized format: sorted numpy array, lag names, and maximum lag value.

Parameters:

Name Type Description Default
forecaster_name str

Name of the forecaster class for error messages.

required
lags Any

Lag specification in one of several formats: - int: Creates lags from 1 to lags (e.g., 5 → [1,2,3,4,5]) - list/tuple/range: Converted to numpy array - numpy.ndarray: Validated and used directly - None: Returns (None, None, None)

required

Returns:

Type Description
Optional[ndarray]

Tuple containing:

Optional[List[str]]
  • lags: Sorted numpy array of lag values (or None)
Optional[int]
  • lags_names: List of lag names like ['lag_1', 'lag_2', ...] (or None)
Tuple[Optional[ndarray], Optional[List[str]], Optional[int]]
  • max_lag: Maximum lag value (or None)

Raises:

Type Description
ValueError

If lags < 1, empty array, or not 1-dimensional.

TypeError

If lags is not an integer, not in the right format for the forecaster, or array contains non-integer values.

Examples:

>>> import numpy as np
>>> from spotforecast2_safe.utils.forecaster_config import initialize_lags
>>>
>>> # Integer input
>>> lags, names, max_lag = initialize_lags("ForecasterRecursive", 3)
>>> lags
array([1, 2, 3])
>>> names
['lag_1', 'lag_2', 'lag_3']
>>> max_lag
3
>>>
>>> # List input
>>> lags, names, max_lag = initialize_lags("ForecasterRecursive", [1, 3, 5])
>>> lags
array([1, 3, 5])
>>> names
['lag_1', 'lag_3', 'lag_5']
>>>
>>> # Range input
>>> lags, names, max_lag = initialize_lags("ForecasterRecursive", range(1, 4))
>>> lags
array([1, 2, 3])
>>>
>>> # None input
>>> lags, names, max_lag = initialize_lags("ForecasterRecursive", None)
>>> lags is None
True
>>>
>>> # Invalid: lags < 1
>>> try:
...     initialize_lags("ForecasterRecursive", 0)
... except ValueError as e:
...     print("Error: Minimum value of lags allowed is 1")
Error: Minimum value of lags allowed is 1
>>>
>>> # Invalid: negative lags
>>> try:
...     initialize_lags("ForecasterRecursive", [1, -2, 3])
... except ValueError as e:
...     print("Error: Minimum value of lags allowed is 1")
Error: Minimum value of lags allowed is 1
Source code in src/spotforecast2_safe/utils/forecaster_config.py
def initialize_lags(
    forecaster_name: str, lags: Any
) -> Tuple[Optional[np.ndarray], Optional[List[str]], Optional[int]]:
    """
    Validate and normalize lag specification for forecasting.

    This function converts various lag specifications (int, list, tuple, range, ndarray)
    into a standardized format: sorted numpy array, lag names, and maximum lag value.

    Args:
        forecaster_name: Name of the forecaster class for error messages.
        lags: Lag specification in one of several formats:
            - int: Creates lags from 1 to lags (e.g., 5 → [1,2,3,4,5])
            - list/tuple/range: Converted to numpy array
            - numpy.ndarray: Validated and used directly
            - None: Returns (None, None, None)

    Returns:
        Tuple containing:
        - lags: Sorted numpy array of lag values (or None)
        - lags_names: List of lag names like ['lag_1', 'lag_2', ...] (or None)
        - max_lag: Maximum lag value (or None)

    Raises:
        ValueError: If lags < 1, empty array, or not 1-dimensional.
        TypeError: If lags is not an integer, not in the right format for the forecaster,
            or array contains non-integer values.

    Examples:
        >>> import numpy as np
        >>> from spotforecast2_safe.utils.forecaster_config import initialize_lags
        >>>
        >>> # Integer input
        >>> lags, names, max_lag = initialize_lags("ForecasterRecursive", 3)
        >>> lags
        array([1, 2, 3])
        >>> names
        ['lag_1', 'lag_2', 'lag_3']
        >>> max_lag
        3
        >>>
        >>> # List input
        >>> lags, names, max_lag = initialize_lags("ForecasterRecursive", [1, 3, 5])
        >>> lags
        array([1, 3, 5])
        >>> names
        ['lag_1', 'lag_3', 'lag_5']
        >>>
        >>> # Range input
        >>> lags, names, max_lag = initialize_lags("ForecasterRecursive", range(1, 4))
        >>> lags
        array([1, 2, 3])
        >>>
        >>> # None input
        >>> lags, names, max_lag = initialize_lags("ForecasterRecursive", None)
        >>> lags is None
        True
        >>>
        >>> # Invalid: lags < 1
        >>> try:
        ...     initialize_lags("ForecasterRecursive", 0)
        ... except ValueError as e:
        ...     print("Error: Minimum value of lags allowed is 1")
        Error: Minimum value of lags allowed is 1
        >>>
        >>> # Invalid: negative lags
        >>> try:
        ...     initialize_lags("ForecasterRecursive", [1, -2, 3])
        ... except ValueError as e:
        ...     print("Error: Minimum value of lags allowed is 1")
        Error: Minimum value of lags allowed is 1
    """
    lags_names = None
    max_lag = None

    if lags is not None:
        if isinstance(lags, int):
            if lags < 1:
                raise ValueError("Minimum value of lags allowed is 1.")
            lags = np.arange(1, lags + 1)

        if isinstance(lags, (list, tuple, range)):
            lags = np.array(lags)

        if isinstance(lags, np.ndarray):
            if lags.size == 0:
                return None, None, None
            if lags.ndim != 1:
                raise ValueError("`lags` must be a 1-dimensional array.")
            if not np.issubdtype(lags.dtype, np.integer):
                raise TypeError("All values in `lags` must be integers.")
            if np.any(lags < 1):
                raise ValueError("Minimum value of lags allowed is 1.")
        else:
            if forecaster_name == "ForecasterDirectMultiVariate":
                raise TypeError(
                    f"`lags` argument must be a dict, int, 1d numpy ndarray, range, "
                    f"tuple or list. Got {type(lags)}."
                )
            else:
                raise TypeError(
                    f"`lags` argument must be an int, 1d numpy ndarray, range, "
                    f"tuple or list. Got {type(lags)}."
                )

        lags = np.sort(lags)
        lags_names = [f"lag_{i}" for i in lags]
        max_lag = int(max(lags))

    return lags, lags_names, max_lag

initialize_transformer_series(forecaster_name, series_names_in_, encoding=None, transformer_series=None)

Initialize transformer_series_ attribute for multivariate/multiseries forecasters.

Creates a dictionary of transformers for each time series in multivariate or multiseries forecasting. Handles three cases: no transformation (None), same transformer for all series (single object), or different transformers per series (dictionary). Clones transformer objects to avoid overwriting.

Parameters:

Name Type Description Default
forecaster_name str

Name of the forecaster using this function. Special handling is applied for 'ForecasterRecursiveMultiSeries'.

required
series_names_in_ list[str]

Names of the time series (levels) used during training. These will be the keys in the returned transformer dictionary.

required
encoding str | None

Encoding used to identify different series. Only used for ForecasterRecursiveMultiSeries. If None, creates a single '_unknown_level' entry. Defaults to None.

None
transformer_series object | dict[str, object | None] | None

Transformer(s) to apply to series. Can be: - None: No transformation applied - Single transformer object: Same transformer cloned for all series - Dict mapping series names to transformers: Different transformer per series Defaults to None.

None

Returns:

Name Type Description
dict dict[str, object | None]

Dictionary with series names as keys and transformer objects (or None) as values. Transformers are cloned to prevent overwriting.

Warns:

Type Description
IgnoredArgumentWarning

If transformer_series is a dict and some series_names_in_ are not present in the dict keys (those series get no transformation).

Examples:

No transformation:

>>> from spotforecast2_safe.forecaster.utils import initialize_transformer_series
>>> series = ['series1', 'series2', 'series3']
>>> result = initialize_transformer_series(
...     forecaster_name='ForecasterDirectMultiVariate',
...     series_names_in_=series,
...     transformer_series=None
... )
>>> print(result)
{'series1': None, 'series2': None, 'series3': None}

Same transformer for all series:

>>> from sklearn.preprocessing import StandardScaler
>>> scaler = StandardScaler()
>>> result = initialize_transformer_series(
...     forecaster_name='ForecasterDirectMultiVariate',
...     series_names_in_=['series1', 'series2'],
...     transformer_series=scaler
... )
>>> len(result)
2
>>> all(isinstance(v, StandardScaler) for v in result.values())
True
>>> result['series1'] is result['series2']  # Different clones
False

Different transformer per series:

>>> from sklearn.preprocessing import MinMaxScaler
>>> transformers = {
...     'series1': StandardScaler(),
...     'series2': MinMaxScaler()
... }
>>> result = initialize_transformer_series(
...     forecaster_name='ForecasterDirectMultiVariate',
...     series_names_in_=['series1', 'series2'],
...     transformer_series=transformers
... )
>>> isinstance(result['series1'], StandardScaler)
True
>>> isinstance(result['series2'], MinMaxScaler)
True
Source code in src/spotforecast2_safe/forecaster/utils.py
def initialize_transformer_series(
    forecaster_name: str,
    series_names_in_: list[str],
    encoding: str | None = None,
    transformer_series: object | dict[str, object | None] | None = None,
) -> dict[str, object | None]:
    """Initialize transformer_series_ attribute for multivariate/multiseries forecasters.

    Creates a dictionary of transformers for each time series in multivariate or
    multiseries forecasting. Handles three cases: no transformation (None), same
    transformer for all series (single object), or different transformers per series
    (dictionary). Clones transformer objects to avoid overwriting.

    Args:
        forecaster_name: Name of the forecaster using this function. Special handling
            is applied for 'ForecasterRecursiveMultiSeries'.
        series_names_in_: Names of the time series (levels) used during training.
            These will be the keys in the returned transformer dictionary.
        encoding: Encoding used to identify different series. Only used for
            ForecasterRecursiveMultiSeries. If None, creates a single '_unknown_level'
            entry. Defaults to None.
        transformer_series: Transformer(s) to apply to series. Can be:
            - None: No transformation applied
            - Single transformer object: Same transformer cloned for all series
            - Dict mapping series names to transformers: Different transformer per series
            Defaults to None.

    Returns:
        dict: Dictionary with series names as keys and transformer objects (or None)
            as values. Transformers are cloned to prevent overwriting.

    Warnings:
        IgnoredArgumentWarning: If transformer_series is a dict and some series_names_in_
            are not present in the dict keys (those series get no transformation).

    Examples:
        No transformation:
        >>> from spotforecast2_safe.forecaster.utils import initialize_transformer_series
        >>> series = ['series1', 'series2', 'series3']
        >>> result = initialize_transformer_series(
        ...     forecaster_name='ForecasterDirectMultiVariate',
        ...     series_names_in_=series,
        ...     transformer_series=None
        ... )
        >>> print(result)
        {'series1': None, 'series2': None, 'series3': None}

        Same transformer for all series:
        >>> from sklearn.preprocessing import StandardScaler
        >>> scaler = StandardScaler()
        >>> result = initialize_transformer_series(
        ...     forecaster_name='ForecasterDirectMultiVariate',
        ...     series_names_in_=['series1', 'series2'],
        ...     transformer_series=scaler
        ... )
        >>> len(result)
        2
        >>> all(isinstance(v, StandardScaler) for v in result.values())
        True
        >>> result['series1'] is result['series2']  # Different clones
        False

        Different transformer per series:
        >>> from sklearn.preprocessing import MinMaxScaler
        >>> transformers = {
        ...     'series1': StandardScaler(),
        ...     'series2': MinMaxScaler()
        ... }
        >>> result = initialize_transformer_series(
        ...     forecaster_name='ForecasterDirectMultiVariate',
        ...     series_names_in_=['series1', 'series2'],
        ...     transformer_series=transformers
        ... )
        >>> isinstance(result['series1'], StandardScaler)
        True
        >>> isinstance(result['series2'], MinMaxScaler)
        True
    """
    from copy import deepcopy
    from sklearn.base import clone
    from spotforecast2_safe.exceptions import IgnoredArgumentWarning

    if forecaster_name == "ForecasterRecursiveMultiSeries":
        if encoding is None:
            series_names_in_ = ["_unknown_level"]
        else:
            series_names_in_ = series_names_in_ + ["_unknown_level"]

    if transformer_series is None:
        transformer_series_ = {serie: None for serie in series_names_in_}
    elif not isinstance(transformer_series, dict):
        transformer_series_ = {
            serie: clone(transformer_series) for serie in series_names_in_
        }
    else:
        transformer_series_ = {serie: None for serie in series_names_in_}
        # Only elements already present in transformer_series_ are updated
        transformer_series_.update(
            {
                k: deepcopy(v)
                for k, v in transformer_series.items()
                if k in transformer_series_
            }
        )

        series_not_in_transformer_series = (
            set(series_names_in_) - set(transformer_series.keys())
        ) - {"_unknown_level"}
        if series_not_in_transformer_series:
            warnings.warn(
                f"{series_not_in_transformer_series} not present in `transformer_series`."
                f" No transformation is applied to these series.",
                IgnoredArgumentWarning,
            )

    return transformer_series_

initialize_weights(forecaster_name, estimator, weight_func, series_weights)

Validate and initialize weight function configuration for forecasting.

This function validates weight_func and series_weights, extracts source code from weight functions for serialization, and checks if the estimator supports sample weights in its fit method.

Parameters:

Name Type Description Default
forecaster_name str

Name of the forecaster class.

required
estimator Any

Scikit-learn compatible estimator or pipeline.

required
weight_func Any

Weight function specification: - Callable: Single weight function - dict: Dictionary of weight functions (for MultiSeries forecasters) - None: No weighting

required
series_weights Any

Dictionary of series-level weights (for MultiSeries forecasters). - dict: Maps series names to weight values - None: No series weighting

required

Returns:

Type Description
Any

Tuple containing:

Optional[Union[str, dict]]
  • weight_func: Validated weight function (or None if invalid)
Any
  • source_code_weight_func: Source code of weight function(s) for serialization (or None)
Tuple[Any, Optional[Union[str, dict]], Any]
  • series_weights: Validated series weights (or None if invalid)

Raises:

Type Description
TypeError

If weight_func is not Callable/dict (depending on forecaster type), or if series_weights is not a dict.

Warns:

Type Description
IgnoredArgumentWarning

If estimator doesn't support sample_weight.

Examples:

>>> import numpy as np
>>> from sklearn.linear_model import Ridge
>>> from spotforecast2_safe.utils.forecaster_config import initialize_weights
>>>
>>> # Simple weight function
>>> def custom_weights(index):
...     return np.ones(len(index))
>>>
>>> estimator = Ridge()
>>> wf, source, sw = initialize_weights(
...     "ForecasterRecursive", estimator, custom_weights, None
... )
>>> wf is not None
True
>>> isinstance(source, str)
True
>>>
>>> # No weight function
>>> wf, source, sw = initialize_weights(
...     "ForecasterRecursive", estimator, None, None
... )
>>> wf is None
True
>>> source is None
True
>>>
>>> # Invalid type for non-MultiSeries forecaster
>>> try:
...     initialize_weights("ForecasterRecursive", estimator, "invalid", None)
... except TypeError as e:
...     print("Error: weight_func must be Callable")
Error: weight_func must be Callable
Source code in src/spotforecast2_safe/utils/forecaster_config.py
def initialize_weights(
    forecaster_name: str, estimator: Any, weight_func: Any, series_weights: Any
) -> Tuple[Any, Optional[Union[str, dict]], Any]:
    """
    Validate and initialize weight function configuration for forecasting.

    This function validates weight_func and series_weights, extracts source code
    from weight functions for serialization, and checks if the estimator supports
    sample weights in its fit method.

    Args:
        forecaster_name: Name of the forecaster class.
        estimator: Scikit-learn compatible estimator or pipeline.
        weight_func: Weight function specification:
            - Callable: Single weight function
            - dict: Dictionary of weight functions (for MultiSeries forecasters)
            - None: No weighting
        series_weights: Dictionary of series-level weights (for MultiSeries forecasters).
            - dict: Maps series names to weight values
            - None: No series weighting

    Returns:
        Tuple containing:
        - weight_func: Validated weight function (or None if invalid)
        - source_code_weight_func: Source code of weight function(s) for serialization (or None)
        - series_weights: Validated series weights (or None if invalid)

    Raises:
        TypeError: If weight_func is not Callable/dict (depending on forecaster type),
            or if series_weights is not a dict.

    Warnings:
        IgnoredArgumentWarning: If estimator doesn't support sample_weight.

    Examples:
        >>> import numpy as np
        >>> from sklearn.linear_model import Ridge
        >>> from spotforecast2_safe.utils.forecaster_config import initialize_weights
        >>>
        >>> # Simple weight function
        >>> def custom_weights(index):
        ...     return np.ones(len(index))
        >>>
        >>> estimator = Ridge()
        >>> wf, source, sw = initialize_weights(
        ...     "ForecasterRecursive", estimator, custom_weights, None
        ... )
        >>> wf is not None
        True
        >>> isinstance(source, str)
        True
        >>>
        >>> # No weight function
        >>> wf, source, sw = initialize_weights(
        ...     "ForecasterRecursive", estimator, None, None
        ... )
        >>> wf is None
        True
        >>> source is None
        True
        >>>
        >>> # Invalid type for non-MultiSeries forecaster
        >>> try:
        ...     initialize_weights("ForecasterRecursive", estimator, "invalid", None)
        ... except TypeError as e:
        ...     print("Error: weight_func must be Callable")
        Error: weight_func must be Callable
    """
    import inspect
    import warnings
    from collections.abc import Callable

    # Import IgnoredArgumentWarning if available, otherwise define locally
    try:
        from spotforecast2_safe.exceptions import IgnoredArgumentWarning
    except ImportError:

        class IgnoredArgumentWarning(UserWarning):
            """Warning for ignored arguments."""

            pass

    source_code_weight_func = None

    if weight_func is not None:
        if forecaster_name in ["ForecasterRecursiveMultiSeries"]:
            if not isinstance(weight_func, (Callable, dict)):
                raise TypeError(
                    f"Argument `weight_func` must be a Callable or a dict of "
                    f"Callables. Got {type(weight_func)}."
                )
        elif not isinstance(weight_func, Callable):
            raise TypeError(
                f"Argument `weight_func` must be a Callable. Got {type(weight_func)}."
            )

        if isinstance(weight_func, dict):
            source_code_weight_func = {}
            for key in weight_func:
                try:
                    source_code_weight_func[key] = inspect.getsource(weight_func[key])
                except (OSError, TypeError):
                    # OSError: source not available, TypeError: callable class instance
                    source_code_weight_func[key] = (
                        f"<source unavailable: {weight_func[key]!r}>"
                    )
        else:
            try:
                source_code_weight_func = inspect.getsource(weight_func)
            except (OSError, TypeError):
                # OSError: source not available (e.g., built-in, lambda in REPL)
                # TypeError: callable class instance (e.g., WeightFunction)
                # In these cases, we can't get source but the object can still be pickled
                source_code_weight_func = f"<source unavailable: {weight_func!r}>"

        if "sample_weight" not in inspect.signature(estimator.fit).parameters:
            warnings.warn(
                f"Argument `weight_func` is ignored since estimator {estimator} "
                f"does not accept `sample_weight` in its `fit` method.",
                IgnoredArgumentWarning,
            )
            weight_func = None
            source_code_weight_func = None

    if series_weights is not None:
        if not isinstance(series_weights, dict):
            raise TypeError(
                f"Argument `series_weights` must be a dict of floats or ints."
                f"Got {type(series_weights)}."
            )
        if "sample_weight" not in inspect.signature(estimator.fit).parameters:
            warnings.warn(
                f"Argument `series_weights` is ignored since estimator {estimator} "
                f"does not accept `sample_weight` in its `fit` method.",
                IgnoredArgumentWarning,
            )
            series_weights = None

    return weight_func, source_code_weight_func, series_weights

initialize_window_features(window_features)

Check window_features argument input and generate the corresponding list.

This function validates window feature objects and extracts their metadata, ensuring they have the required attributes (window_sizes, features_names) and methods (transform_batch, transform) for proper forecasting operations.

Parameters:

Name Type Description Default
window_features Any

Classes used to create window features. Can be a single object or a list of objects. Each object must have window_sizes, features_names attributes and transform_batch, transform methods.

required

Returns:

Name Type Description
tuple Tuple[Optional[List[object]], Optional[List[str]], Optional[int]]

A tuple containing: - window_features (list or None): List of classes used to create window features. - window_features_names (list or None): List with all the features names of the window features. - max_size_window_features (int or None): Maximum value of the window_sizes attribute of all classes.

Raises:

Type Description
ValueError

If window_features is an empty list.

ValueError

If a window feature is missing required attributes or methods.

TypeError

If window_sizes or features_names have incorrect types.

Examples:

>>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
>>> wf = RollingFeatures(stats=['mean', 'std'], window_sizes=[7, 14])
>>> wf_list, names, max_size = initialize_window_features(wf)
>>> print(f"Max window size: {max_size}")
Max window size: 14
>>> print(f"Number of features: {len(names)}")
Number of features: 4

Multiple window features:

>>> wf1 = RollingFeatures(stats=['mean'], window_sizes=7)
>>> wf2 = RollingFeatures(stats=['max', 'min'], window_sizes=3)
>>> wf_list, names, max_size = initialize_window_features([wf1, wf2])
>>> print(f"Max window size: {max_size}")
Max window size: 7
Source code in src/spotforecast2_safe/forecaster/utils.py
def initialize_window_features(
    window_features: Any,
) -> Tuple[Optional[List[object]], Optional[List[str]], Optional[int]]:
    """Check window_features argument input and generate the corresponding list.

    This function validates window feature objects and extracts their metadata,
    ensuring they have the required attributes (window_sizes, features_names) and
    methods (transform_batch, transform) for proper forecasting operations.

    Args:
        window_features: Classes used to create window features. Can be a single
            object or a list of objects. Each object must have `window_sizes`,
            `features_names` attributes and `transform_batch`, `transform` methods.

    Returns:
        tuple: A tuple containing:
            - window_features (list or None): List of classes used to create window features.
            - window_features_names (list or None): List with all the features names of the window features.
            - max_size_window_features (int or None): Maximum value of the `window_sizes` attribute of all classes.

    Raises:
        ValueError: If `window_features` is an empty list.
        ValueError: If a window feature is missing required attributes or methods.
        TypeError: If `window_sizes` or `features_names` have incorrect types.

    Examples:
        >>> from spotforecast2_safe.forecaster.preprocessing import RollingFeatures
        >>> wf = RollingFeatures(stats=['mean', 'std'], window_sizes=[7, 14])
        >>> wf_list, names, max_size = initialize_window_features(wf)
        >>> print(f"Max window size: {max_size}")
        Max window size: 14
        >>> print(f"Number of features: {len(names)}")
        Number of features: 4

        Multiple window features:
        >>> wf1 = RollingFeatures(stats=['mean'], window_sizes=7)
        >>> wf2 = RollingFeatures(stats=['max', 'min'], window_sizes=3)
        >>> wf_list, names, max_size = initialize_window_features([wf1, wf2])
        >>> print(f"Max window size: {max_size}")
        Max window size: 7
    """

    needed_atts = ["window_sizes", "features_names"]
    needed_methods = ["transform_batch", "transform"]

    max_window_sizes = None
    window_features_names = None
    max_size_window_features = None
    if window_features is not None:
        if isinstance(window_features, list) and len(window_features) < 1:
            raise ValueError(
                "Argument `window_features` must contain at least one element."
            )
        if not isinstance(window_features, list):
            window_features = [window_features]

        link_to_docs = (
            "\nVisit the documentation for more information about how to create "
            "custom window features."
        )

        max_window_sizes = []
        window_features_names = []
        needed_atts_set = set(needed_atts)
        needed_methods_set = set(needed_methods)
        for wf in window_features:
            wf_name = type(wf).__name__
            atts_methods = set(dir(wf))
            if not needed_atts_set.issubset(atts_methods):
                raise ValueError(
                    f"{wf_name} must have the attributes: {needed_atts}." + link_to_docs
                )
            if not needed_methods_set.issubset(atts_methods):
                raise ValueError(
                    f"{wf_name} must have the methods: {needed_methods}." + link_to_docs
                )

            window_sizes = wf.window_sizes
            if not isinstance(window_sizes, (int, list)):
                raise TypeError(
                    f"Attribute `window_sizes` of {wf_name} must be an int or a list "
                    f"of ints. Got {type(window_sizes)}." + link_to_docs
                )

            if isinstance(window_sizes, int):
                if window_sizes < 1:
                    raise ValueError(
                        f"If argument `window_sizes` is an integer, it must be equal to or "
                        f"greater than 1. Got {window_sizes} from {wf_name}."
                        + link_to_docs
                    )
                max_window_sizes.append(window_sizes)
            else:
                if not all(isinstance(ws, int) for ws in window_sizes) or not all(
                    ws >= 1 for ws in window_sizes
                ):
                    raise ValueError(
                        f"If argument `window_sizes` is a list, all elements must be integers "
                        f"equal to or greater than 1. Got {window_sizes} from {wf_name}."
                        + link_to_docs
                    )
                max_window_sizes.append(max(window_sizes))

            features_names = wf.features_names
            if not isinstance(features_names, (str, list)):
                raise TypeError(
                    f"Attribute `features_names` of {wf_name} must be a str or "
                    f"a list of strings. Got {type(features_names)}." + link_to_docs
                )
            if isinstance(features_names, str):
                window_features_names.append(features_names)
            else:
                if not all(isinstance(fn, str) for fn in features_names):
                    raise TypeError(
                        f"If argument `features_names` is a list, all elements "
                        f"must be strings. Got {features_names} from {wf_name}."
                        + link_to_docs
                    )
                window_features_names.extend(features_names)

        max_size_window_features = max(max_window_sizes)
        if len(set(window_features_names)) != len(window_features_names):
            raise ValueError(
                f"All window features names must be unique. Got {window_features_names}."
            )

    return window_features, window_features_names, max_size_window_features

input_to_frame(data, input_name)

Convert input data to a pandas DataFrame.

This function ensures consistent DataFrame format for internal processing. If data is already a DataFrame, it's returned as-is. If it's a Series, it's converted to a single-column DataFrame.

Parameters:

Name Type Description Default
data Union[Series, DataFrame]

Input data as pandas Series or DataFrame.

required
input_name str

Name of the input data type. Accepted values are: - 'y': Target time series - 'last_window': Last window for prediction - 'exog': Exogenous variables

required

Returns:

Type Description
DataFrame

DataFrame version of the input data. For Series input, uses the series

DataFrame

name if available, otherwise uses a default name based on input_name.

Examples:

>>> import pandas as pd
>>> from spotforecast2_safe.utils.data_transform import input_to_frame
>>>
>>> # Series with name
>>> y = pd.Series([1, 2, 3], name="sales")
>>> df = input_to_frame(y, input_name="y")
>>> df.columns.tolist()
['sales']
>>>
>>> # Series without name (uses default)
>>> y_no_name = pd.Series([1, 2, 3])
>>> df = input_to_frame(y_no_name, input_name="y")
>>> df.columns.tolist()
['y']
>>>
>>> # DataFrame (returned as-is)
>>> df_input = pd.DataFrame({"temp": [20, 21], "humidity": [50, 55]})
>>> df_output = input_to_frame(df_input, input_name="exog")
>>> df_output.columns.tolist()
['temp', 'humidity']
>>>
>>> # Exog series without name
>>> exog = pd.Series([10, 20, 30])
>>> df_exog = input_to_frame(exog, input_name="exog")
>>> df_exog.columns.tolist()
['exog']
Source code in src/spotforecast2_safe/utils/data_transform.py
def input_to_frame(
    data: Union[pd.Series, pd.DataFrame], input_name: str
) -> pd.DataFrame:
    """
    Convert input data to a pandas DataFrame.

    This function ensures consistent DataFrame format for internal processing.
    If data is already a DataFrame, it's returned as-is. If it's a Series,
    it's converted to a single-column DataFrame.

    Args:
        data: Input data as pandas Series or DataFrame.
        input_name: Name of the input data type. Accepted values are:
            - 'y': Target time series
            - 'last_window': Last window for prediction
            - 'exog': Exogenous variables

    Returns:
        DataFrame version of the input data. For Series input, uses the series
        name if available, otherwise uses a default name based on input_name.

    Examples:
        >>> import pandas as pd
        >>> from spotforecast2_safe.utils.data_transform import input_to_frame
        >>>
        >>> # Series with name
        >>> y = pd.Series([1, 2, 3], name="sales")
        >>> df = input_to_frame(y, input_name="y")
        >>> df.columns.tolist()
        ['sales']
        >>>
        >>> # Series without name (uses default)
        >>> y_no_name = pd.Series([1, 2, 3])
        >>> df = input_to_frame(y_no_name, input_name="y")
        >>> df.columns.tolist()
        ['y']
        >>>
        >>> # DataFrame (returned as-is)
        >>> df_input = pd.DataFrame({"temp": [20, 21], "humidity": [50, 55]})
        >>> df_output = input_to_frame(df_input, input_name="exog")
        >>> df_output.columns.tolist()
        ['temp', 'humidity']
        >>>
        >>> # Exog series without name
        >>> exog = pd.Series([10, 20, 30])
        >>> df_exog = input_to_frame(exog, input_name="exog")
        >>> df_exog.columns.tolist()
        ['exog']
    """
    output_col_name = {"y": "y", "last_window": "y", "exog": "exog"}

    if isinstance(data, pd.Series):
        data = data.to_frame(
            name=data.name if data.name is not None else output_col_name[input_name]
        )

    return data

predict_multivariate(forecasters, steps_ahead, exog=None, show_progress=False)

Generate multi-output predictions using multiple baseline forecasters.

Parameters:

Name Type Description Default
forecasters dict

Dictionary of fitted forecaster instances (one per target). Keys are target names, values are the fitted forecasters (e.g., ForecasterRecursive, ForecasterEquivalentDate).

required
steps_ahead int

Number of steps to forecast.

required
exog DataFrame

Exogenous variables for prediction. If provided, will be passed to each forecaster's predict method.

None
show_progress bool

Show progress bar while predicting per target forecaster. Default: False.

False

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame with predictions for all targets.

Examples:

>>> import pandas as pd
>>> from sklearn.linear_model import LinearRegression
>>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
>>> from spotforecast2_safe.forecaster.utils import predict_multivariate
>>> y1 = pd.Series([1, 2, 3, 4, 5])
>>> y2 = pd.Series([2, 4, 6, 8, 10])
>>> f1 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
>>> f2 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
>>> f1.fit(y=y1)
>>> f2.fit(y=y2)
>>> forecasters = {'target1': f1, 'target2': f2}
>>> predictions = predict_multivariate(forecasters, steps_ahead=2)
>>> predictions
   target1  target2
5      6.0     12.0
6      7.0     14.0
Source code in src/spotforecast2_safe/forecaster/utils.py
def predict_multivariate(
    forecasters: dict[str, Any],
    steps_ahead: int,
    exog: pd.DataFrame | None = None,
    show_progress: bool = False,
) -> pd.DataFrame:
    """
    Generate multi-output predictions using multiple baseline forecasters.

    Args:
        forecasters (dict): Dictionary of fitted forecaster instances (one per target).
            Keys are target names, values are the fitted forecasters (e.g.,
            ForecasterRecursive, ForecasterEquivalentDate).
        steps_ahead (int): Number of steps to forecast.
        exog (pd.DataFrame, optional): Exogenous variables for prediction.
            If provided, will be passed to each forecaster's predict method.
        show_progress (bool, optional): Show progress bar while predicting
            per target forecaster. Default: False.

    Returns:
        pd.DataFrame: DataFrame with predictions for all targets.

    Examples:
        >>> import pandas as pd
        >>> from sklearn.linear_model import LinearRegression
        >>> from spotforecast2_safe.forecaster.recursive import ForecasterRecursive
        >>> from spotforecast2_safe.forecaster.utils import predict_multivariate
        >>> y1 = pd.Series([1, 2, 3, 4, 5])
        >>> y2 = pd.Series([2, 4, 6, 8, 10])
        >>> f1 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
        >>> f2 = ForecasterRecursive(estimator=LinearRegression(), lags=2)
        >>> f1.fit(y=y1)
        >>> f2.fit(y=y2)
        >>> forecasters = {'target1': f1, 'target2': f2}
        >>> predictions = predict_multivariate(forecasters, steps_ahead=2)
        >>> predictions
           target1  target2
        5      6.0     12.0
        6      7.0     14.0
    """

    if not forecasters:
        return pd.DataFrame()

    predictions = {}

    target_iter = forecasters.items()
    if show_progress and tqdm is not None:
        target_iter = tqdm(
            forecasters.items(),
            desc="Predicting targets",
            unit="model",
        )

    for target, forecaster in target_iter:
        # Generate predictions for this target
        if exog is not None:
            pred = forecaster.predict(steps=steps_ahead, exog=exog)
        else:
            pred = forecaster.predict(steps=steps_ahead)
        predictions[target] = pred

    # Combine into a single DataFrame
    return pd.concat(predictions, axis=1)

prepare_steps_direct(max_step, steps=None)

Prepare list of steps to be predicted in Direct Forecasters.

Parameters:

Name Type Description Default
max_step int | list[int] | ndarray

int, list, numpy ndarray Maximum number of future steps the forecaster will predict when using predict methods.

required
steps int | list[int] | None

int, list, None, default None Predict n steps. The value of steps must be less than or equal to the value of steps defined when initializing the forecaster. Starts at 1.

  • If int: Only steps within the range of 1 to int are predicted.
  • If list: List of ints. Only the steps contained in the list are predicted.
  • If None: As many steps are predicted as were defined at initialization.
None

Returns:

Type Description
list[int]

list[int]: Steps to be predicted.

Examples:

from spotforecast2_safe.forecaster.utils import prepare_steps_direct max_step = 5 steps = 3 steps_direct = prepare_steps_direct(max_step, steps) print(steps_direct) [1, 2, 3]

max_step = 5 steps = [1, 3, 5] steps_direct = prepare_steps_direct(max_step, steps) print(steps_direct) [1, 3, 5]

max_step = 5 steps = None steps_direct = prepare_steps_direct(max_step, steps) print(steps_direct) [1, 2, 3, 4, 5]

Source code in src/spotforecast2_safe/forecaster/utils.py
def prepare_steps_direct(
    max_step: int | list[int] | np.ndarray, steps: int | list[int] | None = None
) -> list[int]:
    """
    Prepare list of steps to be predicted in Direct Forecasters.

    Args:
        max_step: int, list, numpy ndarray
            Maximum number of future steps the forecaster will predict
            when using predict methods.
        steps: int, list, None, default None
            Predict n steps. The value of `steps` must be less than or equal to the
            value of steps defined when initializing the forecaster. Starts at 1.

            - If `int`: Only steps within the range of 1 to int are predicted.
            - If `list`: List of ints. Only the steps contained in the list
              are predicted.
            - If `None`: As many steps are predicted as were defined at
              initialization.

    Returns:
        list[int]:
            Steps to be predicted.

    Examples:
        from spotforecast2_safe.forecaster.utils import prepare_steps_direct
        max_step = 5
        steps = 3
        steps_direct = prepare_steps_direct(max_step, steps)
        print(steps_direct)
        [1, 2, 3]

        max_step = 5
        steps = [1, 3, 5]
        steps_direct = prepare_steps_direct(max_step, steps)
        print(steps_direct)
        [1, 3, 5]

        max_step = 5
        steps = None
        steps_direct = prepare_steps_direct(max_step, steps)
        print(steps_direct)
        [1, 2, 3, 4, 5]
    """

    if isinstance(steps, int):
        steps_direct = list(range(1, steps + 1))
    elif steps is None:
        if isinstance(max_step, int):
            steps_direct = list(range(1, max_step + 1))
        else:
            steps_direct = [int(s) for s in max_step]
    elif isinstance(steps, list):
        steps_direct = []
        for step in steps:
            if not isinstance(step, (int, np.integer)):
                raise TypeError(
                    f"`steps` argument must be an int, a list of ints or `None`. "
                    f"Got {type(steps)}."
                )
            steps_direct.append(int(step))

    return steps_direct

select_n_jobs_fit_forecaster(forecaster_name, estimator)

Select the number of jobs to run in parallel during the fit process.

This function determines the optimal number of parallel processes for fitting the forecaster based on the available system resources. In safety-critical environments, this helps manage computational load and ensures system predictability.

Parameters:

Name Type Description Default
forecaster_name str

Name of the forecaster being fitted. Currently unused but reserved for granular resource allocation based on model complexity.

required
estimator object

The estimator object being used by the forecaster. Currently unused but reserved for checking if the estimator itself supports internal parallelism.

required

Returns:

Type Description
int

The number of jobs (CPUs) to use for parallel processing. Defaults to

int

the system CPU count, with a fallback to 1 if the count cannot be

int

determined.

Source code in src/spotforecast2_safe/forecaster/utils.py
def select_n_jobs_fit_forecaster(forecaster_name: str, estimator: object) -> int:
    """Select the number of jobs to run in parallel during the fit process.

    This function determines the optimal number of parallel processes for fitting
    the forecaster based on the available system resources. In safety-critical
    environments, this helps manage computational load and ensures system
    predictability.

    Args:
        forecaster_name: Name of the forecaster being fitted. Currently unused but
            reserved for granular resource allocation based on model complexity.
        estimator: The estimator object being used by the forecaster. Currently
            unused but reserved for checking if the estimator itself supports
            internal parallelism.

    Returns:
        The number of jobs (CPUs) to use for parallel processing. Defaults to
        the system CPU count, with a fallback to 1 if the count cannot be
        determined.
    """
    import os

    return os.cpu_count() or 1

set_skforecast_warnings(suppress_warnings, action='ignore')

Suppress spotforecast warnings.

Parameters:

Name Type Description Default
suppress_warnings bool

bool If True, spotforecast warnings will be suppressed.

required
action str

str, default 'ignore' Action to take regarding the warnings.

'ignore'
Source code in src/spotforecast2_safe/exceptions.py
def set_skforecast_warnings(suppress_warnings: bool, action: str = "ignore") -> None:
    """
    Suppress spotforecast warnings.

    Args:
        suppress_warnings: bool
            If True, spotforecast warnings will be suppressed.
        action: str, default 'ignore'
            Action to take regarding the warnings.
    """
    if suppress_warnings:
        for category in warn_skforecast_categories:
            warnings.simplefilter(action, category=category)

transform_dataframe(df, transformer, fit=False, inverse_transform=False)

Transform raw values of pandas DataFrame with a scikit-learn alike transformer, preprocessor or ColumnTransformer.

The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name Type Description Default
df DataFrame

DataFrame to be transformed.

required
transformer object

Scikit-learn alike transformer, preprocessor, or ColumnTransformer. Must implement fit, transform, fit_transform and inverse_transform.

required
fit bool

Train the transformer before applying it. Defaults to False.

False
inverse_transform bool

Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers. Defaults to False.

False

Returns:

Type Description
DataFrame

Transformed DataFrame.

Raises:

Type Description
TypeError

If df is not a pandas DataFrame.

ValueError

If inverse_transform is requested for ColumnTransformer.

Source code in src/spotforecast2_safe/utils/data_transform.py
def transform_dataframe(
    df: pd.DataFrame,
    transformer: object,
    fit: bool = False,
    inverse_transform: bool = False,
) -> pd.DataFrame:
    """
    Transform raw values of pandas DataFrame with a scikit-learn alike
    transformer, preprocessor or ColumnTransformer.

    The transformer used must have the following methods: fit, transform,
    fit_transform and inverse_transform. ColumnTransformers are not allowed
    since they do not have inverse_transform method.

    Args:
        df: DataFrame to be transformed.
        transformer: Scikit-learn alike transformer, preprocessor, or ColumnTransformer.
            Must implement fit, transform, fit_transform and inverse_transform.
        fit: Train the transformer before applying it. Defaults to False.
        inverse_transform: Transform back the data to the original representation.
            This is not available when using transformers of class
            scikit-learn ColumnTransformers. Defaults to False.

    Returns:
        Transformed DataFrame.

    Raises:
        TypeError: If df is not a pandas DataFrame.
        ValueError: If inverse_transform is requested for ColumnTransformer.
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError(f"`df` argument must be a pandas DataFrame. Got {type(df)}")

    if transformer is None:
        return df

    # Check for ColumnTransformer by class name to avoid importing sklearn
    is_column_transformer = type(
        transformer
    ).__name__ == "ColumnTransformer" or hasattr(transformer, "transformers")

    if inverse_transform and is_column_transformer:
        raise ValueError(
            "`inverse_transform` is not available when using ColumnTransformers."
        )

    if not inverse_transform:
        if fit:
            values_transformed = transformer.fit_transform(df)
        else:
            values_transformed = transformer.transform(df)
    else:
        values_transformed = transformer.inverse_transform(df)

    if hasattr(values_transformed, "toarray"):
        # If the returned values are in sparse matrix format, it is converted to dense
        values_transformed = values_transformed.toarray()

    if isinstance(values_transformed, pd.DataFrame):
        df_transformed = values_transformed
    else:
        df_transformed = pd.DataFrame(
            values_transformed, index=df.index, columns=df.columns
        )

    return df_transformed

transform_numpy(array, transformer, fit=False, inverse_transform=False)

Transform raw values of a numpy ndarray with a scikit-learn alike transformer, preprocessor or ColumnTransformer. The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name Type Description Default
array ndarray

numpy ndarray Array to be transformed.

required
transformer object | None

scikit-learn alike transformer, preprocessor, or ColumnTransformer. Scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform.

required

fit: bool, default False Train the transformer before applying it. inverse_transform: bool, default False Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.

Returns:

Type Description
ndarray

numpy ndarray: Transformed array.

Raises:

Type Description
TypeError

If array is not a numpy ndarray.

TypeError

If transformer is not a scikit-learn alike transformer, preprocessor, or ColumnTransformer.

ValueError

If inverse_transform is True and transformer is a ColumnTransformer.

Examples:

ffrom spotforecast2_safe.forecaster.utils import transform_numpy from sklearn.preprocessing import StandardScaler import numpy as np array = np.array([[1, 2], [3, 4], [5, 6]]) transformer = StandardScaler() array_transformed = transform_numpy(array, transformer, fit=True) print(array_transformed) [[-1.22474487 -1.22474487] [ 0. 0. ] [ 1.22474487 1.22474487]] array_inversed = transform_numpy(array_transformed, transformer, inverse_transform=True) print(array_inversed) [[1. 2.] [3. 4.] [5. 6.]]

Source code in src/spotforecast2_safe/forecaster/utils.py
def transform_numpy(
    array: np.ndarray,
    transformer: object | None,
    fit: bool = False,
    inverse_transform: bool = False,
) -> np.ndarray:
    """
    Transform raw values of a numpy ndarray with a scikit-learn alike
    transformer, preprocessor or ColumnTransformer. The transformer used must
    have the following methods: fit, transform, fit_transform and
    inverse_transform. ColumnTransformers are not allowed since they do not
    have inverse_transform method.

    Args:
        array: numpy ndarray
            Array to be transformed.
        transformer: scikit-learn alike transformer, preprocessor, or ColumnTransformer.
            Scikit-learn alike transformer (preprocessor) with methods: fit, transform,
            fit_transform and inverse_transform.
    fit: bool, default False
        Train the transformer before applying it.
    inverse_transform: bool, default False
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns:
        numpy ndarray: Transformed array.

    Raises:
        TypeError: If `array` is not a numpy ndarray.
        TypeError: If `transformer` is not a scikit-learn alike transformer, preprocessor, or ColumnTransformer.
        ValueError: If `inverse_transform` is True and `transformer` is a ColumnTransformer.

    Examples:
        ffrom spotforecast2_safe.forecaster.utils import transform_numpy
        from sklearn.preprocessing import StandardScaler
        import numpy as np
        array = np.array([[1, 2], [3, 4], [5, 6]])
        transformer = StandardScaler()
        array_transformed = transform_numpy(array, transformer, fit=True)
        print(array_transformed)
        [[-1.22474487 -1.22474487]
         [ 0.          0.        ]
         [ 1.22474487  1.22474487]]
         array_inversed = transform_numpy(array_transformed, transformer, inverse_transform=True)
         print(array_inversed)
         [[1. 2.]
          [3. 4.]
          [5. 6.]]
    """

    if transformer is None:
        return array

    if not isinstance(array, np.ndarray):
        raise TypeError(f"`array` argument must be a numpy ndarray. Got {type(array)}")

    original_ndim = array.ndim
    original_shape = array.shape
    reshaped_for_inverse = False

    if original_ndim == 1:
        array = array.reshape(-1, 1)

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise ValueError(
            "`inverse_transform` is not available when using ColumnTransformers."
        )

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="X does not have valid feature names",
            category=UserWarning,
        )
        if not inverse_transform:
            if fit:
                array_transformed = transformer.fit_transform(array)
            else:
                array_transformed = transformer.transform(array)
        else:
            # Vectorized inverse transformation for 2D arrays with multiple columns.
            # Reshape to single column, transform, and reshape back.
            # This is faster than applying the transformer column by column.
            if array.shape[1] > 1:
                array = array.reshape(-1, 1)
                reshaped_for_inverse = True
            array_transformed = transformer.inverse_transform(array)

    if hasattr(array_transformed, "toarray"):
        # If the returned values are in sparse matrix format, it is converted to dense
        array_transformed = array_transformed.toarray()

    if isinstance(array_transformed, (pd.Series, pd.DataFrame)):
        array_transformed = array_transformed.to_numpy()

    # Reshape back to original shape only if we reshaped for inverse_transform
    if reshaped_for_inverse:
        array_transformed = array_transformed.reshape(original_shape)

    if original_ndim == 1:
        array_transformed = array_transformed.ravel()

    return array_transformed